diff --git a/.appveyor.yml b/.appveyor.yml index 31a726934..d2cac8213 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -12,14 +12,16 @@ environment: CYG_SETUP: setup-x86.exe BASH: C:\cygwin\bin\bash CC: gcc - - MSYSTEM: MINGW64 - MSYS_CACHE: C:\msys64\var\cache\pacman\pkg - BASH: C:\msys64\usr\bin\bash - CC: gcc - - MSYSTEM: MINGW32 - MSYS_CACHE: C:\msys64\var\cache\pacman\pkg - BASH: C:\msys64\usr\bin\bash - CC: gcc + # Disable MINGW test. It seems AppVeyor is no longer supporting MSYS2. Not a hashcat problem. + # See BUILD_MSYS2.md for local test + #- MSYSTEM: MINGW64 + # MSYS_CACHE: C:\msys64\var\cache\pacman\pkg + # BASH: C:\msys64\usr\bin\bash + # CC: gcc + #- MSYSTEM: MINGW32 + # MSYS_CACHE: C:\msys64\var\cache\pacman\pkg + # BASH: C:\msys64\usr\bin\bash + # CC: gcc # if we have too many commits at the same time, we might need to download more than just the last commit for appveyor to succeed # otherwise we get the error: "fatal: reference is not a tree " @@ -33,14 +35,15 @@ install: - ps: if (Test-Path Env:\CYG_ROOT) { Start-FileDownload "https://cygwin.com/$env:CYG_SETUP" -FileName "$env:CYG_SETUP" } - if defined CYG_ROOT (%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages "%CYG_PACKAGES%" --upgrade-also) # (temporary?) problem with msys/pacman/objc/ada (see https://github.com/msys2/msys2/wiki/FAQ) - - if defined MSYSTEM (%BASH% -lc "pacman -Rns --noconfirm mingw-w64-{i686,x86_64}-gcc-ada mingw-w64-{i686,x86_64}-gcc-objc") + #- if defined MSYSTEM (%BASH% -lc "pacman -Rns --noconfirm mingw-w64-{i686,x86_64}-gcc-ada mingw-w64-{i686,x86_64}-gcc-objc") # temporary fix for MSYS revoked/new signing keys: - - if defined MSYSTEM (%BASH% -lc "curl https://pastebin.com/raw/e0y4Ky9U | bash") - - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") + #- if defined MSYSTEM (%BASH% -lc "curl https://pastebin.com/raw/e0y4Ky9U | bash") + #- if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") # the following line is not a duplicate line: # it is necessary to upgrade the MSYS base files and after that all the packages # the 2 separate commands/lines are required because a new shell is necessary for each step - - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") + #- if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") + #- if defined MSYSTEM (%BASH% -lc "pacman -S --needed --noconfirm git make gcc libiconv-devel") build_script: - if defined BASH (%BASH% -lc "cd $(cygpath ${APPVEYOR_BUILD_FOLDER}) && make") diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..1f017f2ac --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,57 @@ +--- +name: Bug report +about: Something is not working as expected +title: '' +labels: bug +assignees: '' + +--- + +**GitHub is for bugs and features - not for support** +For support, please use the hashcat forums https://hashcat.net/forum/ + +**Check the FAQ** +Some items that might appear to be issues are not issues. Please review the hashcat FAQ https://hashcat.net/wiki/doku.php?id=frequently_asked_questions before submitting a bug report. + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Please provide us with all files required to reproduce the bug locally on our development systems. For instance: hash files, wordlists, rule files, ... + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Hardware/Compute device (please complete the following information):** +- Compute device name: [e.g. RTX2080Ti] +- OpenCL/CUDA driver name: [e.g. NVIDIA DRIVER] +- OpenCL/CUDA driver version: [e.g. 465.21] +- OpenCL/CUDA driver source: [e.g. runtime installer/.exe installer] + +**Hashcat version (please complete the following information):** + - OS: [e.g. Linux] + - Distribution: [e.g. Ubuntu 18.04] + - Version: [e.g. 6.2.0] + +**Diagnostic output compute devices:** + + +``` +For NV: Post nvidia-smi output. This tool also exist on Windows +For AMD ROCm: Post rocm-smi and rocminfo output +``` + +``` +Post clinfo output +``` + +``` +Post hashcat -I output +``` + +``` +On Linux: post lspci output +``` + +**Additional context** +Add any other context about the problem here. For instance, it was working with hashcat version X.X.X (also please post output from older versions). diff --git a/.github/ISSUE_TEMPLATE/feature_request_-_add_new_algorithm.md b/.github/ISSUE_TEMPLATE/feature_request_-_add_new_algorithm.md new file mode 100644 index 000000000..c6a99220b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request_-_add_new_algorithm.md @@ -0,0 +1,32 @@ +--- +name: Feature Request - Add new algorithm +about: Add support for new hash-mode (-m) +title: '' +labels: new algorithm +assignees: '' + +--- + +**Is your request based on a publicly know cryptographic schema and where can we find information about?** +For instance: +- The hashing function XY is a public known hash algorithm on Wikipedia. +- The password protection for the application XY was made by public by security researcher XY and there's a paper called ... +- The cryptographic schema used in protocol XY can be found in the OSS code on it's GitHub repository. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. For instance, other standalone crackers which already support this algorithm. + +**Provide an example hash/database/file and a known correct password** +The most easy way to produce this is to simply generate a new one using the application itself. + +**Explain how the cryptographic schema is implemented** +The best way is with a proof of concept code in any computer language or pseudo code you like. + +**Describe the known limitations of the algorithm** +Many applications limit the password and/or salt to a specific minimum and maximum length. + +**Where do you typically find the algorithm** +In case you are requesting a cryptographic primitive, name some of the Applications/OS in which it is being used. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/BUILD_CYGWIN.md b/BUILD_CYGWIN.md index 1a867b389..13b1d2c02 100644 --- a/BUILD_CYGWIN.md +++ b/BUILD_CYGWIN.md @@ -1,6 +1,6 @@ # Compiling hashcat with Cygwin. -Tested on a Windows 7 SP1 x64 machine. +Tested on a Windows 10 20H2 x64 machine. ### Installation ### @@ -11,6 +11,7 @@ Make sure to install additional dependencies necessary for hashcat compilation b ``` libiconv-devel gcc-core +gcc-g++ make git ``` diff --git a/BUILD_MSYS2.md b/BUILD_MSYS2.md index d3b43b9ba..09c7b4f1a 100644 --- a/BUILD_MSYS2.md +++ b/BUILD_MSYS2.md @@ -1,6 +1,6 @@ # Compiling hashcat with msys2. -Tested on a Windows 7 SP1 x64 machine. +Tested on a Windows 10 20H2 x64 machine. ### Installation ### diff --git a/BUILD_WSL.md b/BUILD_WSL.md index 814eaa326..115c3772b 100644 --- a/BUILD_WSL.md +++ b/BUILD_WSL.md @@ -2,13 +2,17 @@ Tested on Windows 10 x64, should also work to build hashcat for Windows on Linux. +I had it tested with WSL2 using Ubuntu_2004.2020.424.0_x64.appx. + +Make sure to have the system upgraded after install (otherwise it will fail to find the gcc-mingw-w64-x86-64 package). + ### Installation ### Enable WSL. Press the win + r key on your keyboard simultaneously and in the "Run" popup window type bash and make sure to install additional dependencies necessary for hashcat compilation ``` -sudo apt install gcc-mingw-w64-x86-64 make git +sudo apt install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 make git git clone https://github.com/hashcat/hashcat git clone https://github.com/win-iconv/win-iconv cd win-iconv/ @@ -33,4 +37,4 @@ cd "C:\Users\user\hashcat" and start hashcat by typing ``` hashcat.exe -``` \ No newline at end of file +``` diff --git a/OpenCL/inc_cipher_aes-gcm.cl b/OpenCL/inc_cipher_aes-gcm.cl new file mode 100644 index 000000000..5ea054fb3 --- /dev/null +++ b/OpenCL/inc_cipher_aes-gcm.cl @@ -0,0 +1,306 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.h" +#include "inc_common.h" +#include "inc_cipher_aes.h" +#include "inc_cipher_aes-gcm.h" + +DECLSPEC void AES_GCM_inc32 (u32 *block) +{ + block[3] += 1; +} + +DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src) +{ + dst[0] ^= src[0]; + dst[1] ^= src[1]; + dst[2] ^= src[2]; + dst[3] ^= src[3]; +} + +DECLSPEC void AES_GCM_gf_mult (const u32 *x, const u32 *y, u32 *z) +{ + z[0] = 0; + z[1] = 0; + z[2] = 0; + z[3] = 0; + + u32 t[4]; + + t[0] = y[0]; + t[1] = y[1]; + t[2] = y[2]; + t[3] = y[3]; + + for (int i = 0; i < 4; i++) + { + const u32 tv = x[i]; + + for (int j = 0; j < 32; j++) + { + if ((tv >> (31 - j)) & 1) + { + z[0] ^= t[0]; + z[1] ^= t[1]; + z[2] ^= t[2]; + z[3] ^= t[3]; + } + + const int m = t[3] & 1; // save lost bit + + t[3] = (t[2] << 31) | (t[3] >> 1); + t[2] = (t[1] << 31) | (t[2] >> 1); + t[1] = (t[0] << 31) | (t[1] >> 1); + t[0] = 0 | (t[0] >> 1); + + t[0] ^= m * 0xe1000000; + } + } +} + +DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, int in_len, u32 *out) +{ + int i; + int j; + + for (i = 0, j = 0; i < in_len - 15; i += 16, j += 4) + { + u32 t2[4]; + + t2[0] = in[j + 0]; + t2[1] = in[j + 1]; + t2[2] = in[j + 2]; + t2[3] = in[j + 3]; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } + + const int left = in_len - i; + + if (left > 0) + { + u32 t2[4]; + + t2[0] = (left > 0) ? in[j + 0] : 0; + t2[1] = (left > 4) ? in[j + 1] : 0; + t2[2] = (left > 8) ? in[j + 2] : 0; + t2[3] = (left > 12) ? in[j + 3] : 0; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } +} + +DECLSPEC void AES_GCM_ghash_global (const u32 *subkey, GLOBAL_AS const u32 *in, int in_len, u32 *out) +{ + int i; + int j; + + for (i = 0, j = 0; i < in_len - 15; i += 16, j += 4) + { + u32 t2[4]; + + t2[0] = in[j + 0]; + t2[1] = in[j + 1]; + t2[2] = in[j + 2]; + t2[3] = in[j + 3]; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } + + const int left = in_len - i; + + if (left > 0) + { + u32 t2[4]; + + t2[0] = (left > 0) ? in[j + 0] : 0; + t2[1] = (left > 4) ? in[j + 1] : 0; + t2[2] = (left > 8) ? in[j + 2] : 0; + t2[3] = (left > 12) ? in[j + 3] : 0; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } +} + +DECLSPEC void AES_GCM_Init (const u32 *ukey, int key_len, u32 *key, u32 *subkey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + if (key_len == 128) + { + AES128_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (key, subkey, subkey, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (key_len == 192) + { + AES192_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (key, subkey, subkey, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (key_len == 256) + { + AES256_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (key, subkey, subkey, s_te0, s_te1, s_te2, s_te3, s_te4); + } +} + +DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, int iv_len, const u32 *subkey, u32 *J0) +{ + if (iv_len == 12) + { + J0[0] = iv[0]; + J0[1] = iv[1]; + J0[2] = iv[2]; + J0[3] = 0x00000001; + } + else + { + AES_GCM_gf_mult (iv, subkey, J0); + + u32 len_buf[4] = { 0 }; + + len_buf[3] = iv_len * 8; + + AES_GCM_xor_block (len_buf, J0); + + AES_GCM_gf_mult (len_buf, subkey, J0); + } +} + +DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + const u32 *xpos = in; + + u32 *ypos = out; + + u32 iv_buf[4]; + + iv_buf[0] = iv[0]; + iv_buf[1] = iv[1]; + iv_buf[2] = iv[2]; + iv_buf[3] = iv[3]; + + const int n = in_len / 16; + + for (u32 i = 0; i < n; i++) + { + AES256_encrypt (key, iv_buf, ypos, s_te0, s_te1, s_te2, s_te3, s_te4); + + AES_GCM_xor_block (ypos, xpos); + + xpos += 4; + ypos += 4; + + AES_GCM_inc32 (iv_buf); + } + + // this is not byte accurate but 4-byte accurate. needs fix? + + int last = in + (in_len/4) - xpos; + + if (last) + { + u32 tmp[4] = { 0 }; + + AES256_encrypt (key, iv_buf, tmp, s_te0, s_te1, s_te2, s_te3, s_te4); + + if (last >= 1) *ypos++ = *xpos++ ^ tmp[0]; + if (last >= 2) *ypos++ = *xpos++ ^ tmp[1]; + if (last >= 3) *ypos++ = *xpos++ ^ tmp[2]; + } +} + +DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + u32 J0_incr[4]; + + J0_incr[0] = J0[0]; + J0_incr[1] = J0[1]; + J0_incr[2] = J0[2]; + J0_incr[3] = J0[3]; + + AES_GCM_gctr (key, J0_incr, in, in_len, out, s_te0, s_te1, s_te2, s_te3, s_te4); +} + +DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, int aad_len, const u32 *enc_buf, int enc_len, u32 *out) +{ + out[0] = 0; + out[1] = 0; + out[2] = 0; + out[3] = 0; + + AES_GCM_ghash (subkey, aad_buf, aad_len, out); + + AES_GCM_ghash (subkey, enc_buf, enc_len, out); + + u32 len_buf[4]; + + len_buf[0] = aad_len * 8; + len_buf[1] = 0; + len_buf[2] = 0; + len_buf[3] = enc_len * 8; + + AES_GCM_ghash (subkey, len_buf, 16, out); +} + +DECLSPEC void AES_GCM_GHASH_GLOBAL (const u32 *subkey, const u32 *aad_buf, int aad_len, GLOBAL_AS const u32 *enc_buf, int enc_len, u32 *out) +{ + out[0] = 0; + out[1] = 0; + out[2] = 0; + out[3] = 0; + + AES_GCM_ghash (subkey, aad_buf, aad_len, out); + + AES_GCM_ghash_global (subkey, enc_buf, enc_len, out); + + u32 len_buf[4]; + + len_buf[0] = aad_len * 8; + len_buf[1] = 0; + len_buf[2] = 0; + len_buf[3] = enc_len * 8; + + AES_GCM_ghash (subkey, len_buf, 16, out); +} diff --git a/OpenCL/inc_cipher_aes-gcm.h b/OpenCL/inc_cipher_aes-gcm.h new file mode 100644 index 000000000..753a4d0c7 --- /dev/null +++ b/OpenCL/inc_cipher_aes-gcm.h @@ -0,0 +1,21 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _INC_CIPHER_AES_GCM_H +#define _INC_CIPHER_AES_GCM_H + +DECLSPEC void AES_GCM_inc32 (u32 *block); +DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src); +DECLSPEC void AES_GCM_gf_mult (const u32 *x, const u32 *y, u32 *z); +DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, int in_len, u32 *out); +DECLSPEC void AES_GCM_ghash_global (const u32 *subkey, GLOBAL_AS const u32 *in, int in_len, u32 *out); +DECLSPEC void AES_GCM_Init (const u32 *ukey, int key_len, u32 *key, u32 *subkey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); +DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, int iv_len, const u32 *subkey, u32 *J0); +DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); +DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); +DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, int aad_len, const u32 *enc_buf, int enc_len, u32 *out); +DECLSPEC void AES_GCM_GHASH_GLOBAL (const u32 *subkey, const u32 *aad_buf, int aad_len, GLOBAL_AS const u32 *enc_buf, int enc_len, u32 *out); + +#endif // _INC_CIPHER_AES_GCM_H diff --git a/OpenCL/inc_cipher_serpent.cl b/OpenCL/inc_cipher_serpent.cl index 98632fdef..3924fa584 100644 --- a/OpenCL/inc_cipher_serpent.cl +++ b/OpenCL/inc_cipher_serpent.cl @@ -15,6 +15,7 @@ /* -------------------------------------------------------------------- */ /* */ /* Cleaned and optimized for GPU use with hashcat by Jens Steube */ +/* Added 192-bit functions by Gabriele Gristina */ #include "inc_vendor.h" #include "inc_types.h" @@ -690,6 +691,289 @@ DECLSPEC void serpent128_decrypt (const u32 *ks, const u32 *in, u32 *out) out[3] = d; } +// 192 bit key + +DECLSPEC void serpent192_set_key (u32 *ks, const u32 *ukey) +{ + ks[ 0] = ukey[0]; + ks[ 1] = ukey[1]; + ks[ 2] = ukey[2]; + ks[ 3] = ukey[3]; + ks[ 4] = ukey[4]; + ks[ 5] = ukey[5]; + ks[ 6] = 1; + ks[ 7] = 0; + ks[ 8] = hc_rotl32_S ((ks[ 7] ^ ks[ 5] ^ ks[ 3] ^ ks[ 0] ^ 0x9e3779b9 ^ 0), 11); + ks[ 9] = hc_rotl32_S ((ks[ 8] ^ ks[ 6] ^ ks[ 4] ^ ks[ 1] ^ 0x9e3779b9 ^ 1), 11); + ks[ 10] = hc_rotl32_S ((ks[ 9] ^ ks[ 7] ^ ks[ 5] ^ ks[ 2] ^ 0x9e3779b9 ^ 2), 11); + ks[ 11] = hc_rotl32_S ((ks[ 10] ^ ks[ 8] ^ ks[ 6] ^ ks[ 3] ^ 0x9e3779b9 ^ 3), 11); + ks[ 12] = hc_rotl32_S ((ks[ 11] ^ ks[ 9] ^ ks[ 7] ^ ks[ 4] ^ 0x9e3779b9 ^ 4), 11); + ks[ 13] = hc_rotl32_S ((ks[ 12] ^ ks[ 10] ^ ks[ 8] ^ ks[ 5] ^ 0x9e3779b9 ^ 5), 11); + ks[ 14] = hc_rotl32_S ((ks[ 13] ^ ks[ 11] ^ ks[ 9] ^ ks[ 6] ^ 0x9e3779b9 ^ 6), 11); + ks[ 15] = hc_rotl32_S ((ks[ 14] ^ ks[ 12] ^ ks[ 10] ^ ks[ 7] ^ 0x9e3779b9 ^ 7), 11); + ks[ 16] = hc_rotl32_S ((ks[ 15] ^ ks[ 13] ^ ks[ 11] ^ ks[ 8] ^ 0x9e3779b9 ^ 8), 11); + ks[ 17] = hc_rotl32_S ((ks[ 16] ^ ks[ 14] ^ ks[ 12] ^ ks[ 9] ^ 0x9e3779b9 ^ 9), 11); + ks[ 18] = hc_rotl32_S ((ks[ 17] ^ ks[ 15] ^ ks[ 13] ^ ks[ 10] ^ 0x9e3779b9 ^ 10), 11); + ks[ 19] = hc_rotl32_S ((ks[ 18] ^ ks[ 16] ^ ks[ 14] ^ ks[ 11] ^ 0x9e3779b9 ^ 11), 11); + ks[ 20] = hc_rotl32_S ((ks[ 19] ^ ks[ 17] ^ ks[ 15] ^ ks[ 12] ^ 0x9e3779b9 ^ 12), 11); + ks[ 21] = hc_rotl32_S ((ks[ 20] ^ ks[ 18] ^ ks[ 16] ^ ks[ 13] ^ 0x9e3779b9 ^ 13), 11); + ks[ 22] = hc_rotl32_S ((ks[ 21] ^ ks[ 19] ^ ks[ 17] ^ ks[ 14] ^ 0x9e3779b9 ^ 14), 11); + ks[ 23] = hc_rotl32_S ((ks[ 22] ^ ks[ 20] ^ ks[ 18] ^ ks[ 15] ^ 0x9e3779b9 ^ 15), 11); + ks[ 24] = hc_rotl32_S ((ks[ 23] ^ ks[ 21] ^ ks[ 19] ^ ks[ 16] ^ 0x9e3779b9 ^ 16), 11); + ks[ 25] = hc_rotl32_S ((ks[ 24] ^ ks[ 22] ^ ks[ 20] ^ ks[ 17] ^ 0x9e3779b9 ^ 17), 11); + ks[ 26] = hc_rotl32_S ((ks[ 25] ^ ks[ 23] ^ ks[ 21] ^ ks[ 18] ^ 0x9e3779b9 ^ 18), 11); + ks[ 27] = hc_rotl32_S ((ks[ 26] ^ ks[ 24] ^ ks[ 22] ^ ks[ 19] ^ 0x9e3779b9 ^ 19), 11); + ks[ 28] = hc_rotl32_S ((ks[ 27] ^ ks[ 25] ^ ks[ 23] ^ ks[ 20] ^ 0x9e3779b9 ^ 20), 11); + ks[ 29] = hc_rotl32_S ((ks[ 28] ^ ks[ 26] ^ ks[ 24] ^ ks[ 21] ^ 0x9e3779b9 ^ 21), 11); + ks[ 30] = hc_rotl32_S ((ks[ 29] ^ ks[ 27] ^ ks[ 25] ^ ks[ 22] ^ 0x9e3779b9 ^ 22), 11); + ks[ 31] = hc_rotl32_S ((ks[ 30] ^ ks[ 28] ^ ks[ 26] ^ ks[ 23] ^ 0x9e3779b9 ^ 23), 11); + ks[ 32] = hc_rotl32_S ((ks[ 31] ^ ks[ 29] ^ ks[ 27] ^ ks[ 24] ^ 0x9e3779b9 ^ 24), 11); + ks[ 33] = hc_rotl32_S ((ks[ 32] ^ ks[ 30] ^ ks[ 28] ^ ks[ 25] ^ 0x9e3779b9 ^ 25), 11); + ks[ 34] = hc_rotl32_S ((ks[ 33] ^ ks[ 31] ^ ks[ 29] ^ ks[ 26] ^ 0x9e3779b9 ^ 26), 11); + ks[ 35] = hc_rotl32_S ((ks[ 34] ^ ks[ 32] ^ ks[ 30] ^ ks[ 27] ^ 0x9e3779b9 ^ 27), 11); + ks[ 36] = hc_rotl32_S ((ks[ 35] ^ ks[ 33] ^ ks[ 31] ^ ks[ 28] ^ 0x9e3779b9 ^ 28), 11); + ks[ 37] = hc_rotl32_S ((ks[ 36] ^ ks[ 34] ^ ks[ 32] ^ ks[ 29] ^ 0x9e3779b9 ^ 29), 11); + ks[ 38] = hc_rotl32_S ((ks[ 37] ^ ks[ 35] ^ ks[ 33] ^ ks[ 30] ^ 0x9e3779b9 ^ 30), 11); + ks[ 39] = hc_rotl32_S ((ks[ 38] ^ ks[ 36] ^ ks[ 34] ^ ks[ 31] ^ 0x9e3779b9 ^ 31), 11); + ks[ 40] = hc_rotl32_S ((ks[ 39] ^ ks[ 37] ^ ks[ 35] ^ ks[ 32] ^ 0x9e3779b9 ^ 32), 11); + ks[ 41] = hc_rotl32_S ((ks[ 40] ^ ks[ 38] ^ ks[ 36] ^ ks[ 33] ^ 0x9e3779b9 ^ 33), 11); + ks[ 42] = hc_rotl32_S ((ks[ 41] ^ ks[ 39] ^ ks[ 37] ^ ks[ 34] ^ 0x9e3779b9 ^ 34), 11); + ks[ 43] = hc_rotl32_S ((ks[ 42] ^ ks[ 40] ^ ks[ 38] ^ ks[ 35] ^ 0x9e3779b9 ^ 35), 11); + ks[ 44] = hc_rotl32_S ((ks[ 43] ^ ks[ 41] ^ ks[ 39] ^ ks[ 36] ^ 0x9e3779b9 ^ 36), 11); + ks[ 45] = hc_rotl32_S ((ks[ 44] ^ ks[ 42] ^ ks[ 40] ^ ks[ 37] ^ 0x9e3779b9 ^ 37), 11); + ks[ 46] = hc_rotl32_S ((ks[ 45] ^ ks[ 43] ^ ks[ 41] ^ ks[ 38] ^ 0x9e3779b9 ^ 38), 11); + ks[ 47] = hc_rotl32_S ((ks[ 46] ^ ks[ 44] ^ ks[ 42] ^ ks[ 39] ^ 0x9e3779b9 ^ 39), 11); + ks[ 48] = hc_rotl32_S ((ks[ 47] ^ ks[ 45] ^ ks[ 43] ^ ks[ 40] ^ 0x9e3779b9 ^ 40), 11); + ks[ 49] = hc_rotl32_S ((ks[ 48] ^ ks[ 46] ^ ks[ 44] ^ ks[ 41] ^ 0x9e3779b9 ^ 41), 11); + ks[ 50] = hc_rotl32_S ((ks[ 49] ^ ks[ 47] ^ ks[ 45] ^ ks[ 42] ^ 0x9e3779b9 ^ 42), 11); + ks[ 51] = hc_rotl32_S ((ks[ 50] ^ ks[ 48] ^ ks[ 46] ^ ks[ 43] ^ 0x9e3779b9 ^ 43), 11); + ks[ 52] = hc_rotl32_S ((ks[ 51] ^ ks[ 49] ^ ks[ 47] ^ ks[ 44] ^ 0x9e3779b9 ^ 44), 11); + ks[ 53] = hc_rotl32_S ((ks[ 52] ^ ks[ 50] ^ ks[ 48] ^ ks[ 45] ^ 0x9e3779b9 ^ 45), 11); + ks[ 54] = hc_rotl32_S ((ks[ 53] ^ ks[ 51] ^ ks[ 49] ^ ks[ 46] ^ 0x9e3779b9 ^ 46), 11); + ks[ 55] = hc_rotl32_S ((ks[ 54] ^ ks[ 52] ^ ks[ 50] ^ ks[ 47] ^ 0x9e3779b9 ^ 47), 11); + ks[ 56] = hc_rotl32_S ((ks[ 55] ^ ks[ 53] ^ ks[ 51] ^ ks[ 48] ^ 0x9e3779b9 ^ 48), 11); + ks[ 57] = hc_rotl32_S ((ks[ 56] ^ ks[ 54] ^ ks[ 52] ^ ks[ 49] ^ 0x9e3779b9 ^ 49), 11); + ks[ 58] = hc_rotl32_S ((ks[ 57] ^ ks[ 55] ^ ks[ 53] ^ ks[ 50] ^ 0x9e3779b9 ^ 50), 11); + ks[ 59] = hc_rotl32_S ((ks[ 58] ^ ks[ 56] ^ ks[ 54] ^ ks[ 51] ^ 0x9e3779b9 ^ 51), 11); + ks[ 60] = hc_rotl32_S ((ks[ 59] ^ ks[ 57] ^ ks[ 55] ^ ks[ 52] ^ 0x9e3779b9 ^ 52), 11); + ks[ 61] = hc_rotl32_S ((ks[ 60] ^ ks[ 58] ^ ks[ 56] ^ ks[ 53] ^ 0x9e3779b9 ^ 53), 11); + ks[ 62] = hc_rotl32_S ((ks[ 61] ^ ks[ 59] ^ ks[ 57] ^ ks[ 54] ^ 0x9e3779b9 ^ 54), 11); + ks[ 63] = hc_rotl32_S ((ks[ 62] ^ ks[ 60] ^ ks[ 58] ^ ks[ 55] ^ 0x9e3779b9 ^ 55), 11); + ks[ 64] = hc_rotl32_S ((ks[ 63] ^ ks[ 61] ^ ks[ 59] ^ ks[ 56] ^ 0x9e3779b9 ^ 56), 11); + ks[ 65] = hc_rotl32_S ((ks[ 64] ^ ks[ 62] ^ ks[ 60] ^ ks[ 57] ^ 0x9e3779b9 ^ 57), 11); + ks[ 66] = hc_rotl32_S ((ks[ 65] ^ ks[ 63] ^ ks[ 61] ^ ks[ 58] ^ 0x9e3779b9 ^ 58), 11); + ks[ 67] = hc_rotl32_S ((ks[ 66] ^ ks[ 64] ^ ks[ 62] ^ ks[ 59] ^ 0x9e3779b9 ^ 59), 11); + ks[ 68] = hc_rotl32_S ((ks[ 67] ^ ks[ 65] ^ ks[ 63] ^ ks[ 60] ^ 0x9e3779b9 ^ 60), 11); + ks[ 69] = hc_rotl32_S ((ks[ 68] ^ ks[ 66] ^ ks[ 64] ^ ks[ 61] ^ 0x9e3779b9 ^ 61), 11); + ks[ 70] = hc_rotl32_S ((ks[ 69] ^ ks[ 67] ^ ks[ 65] ^ ks[ 62] ^ 0x9e3779b9 ^ 62), 11); + ks[ 71] = hc_rotl32_S ((ks[ 70] ^ ks[ 68] ^ ks[ 66] ^ ks[ 63] ^ 0x9e3779b9 ^ 63), 11); + ks[ 72] = hc_rotl32_S ((ks[ 71] ^ ks[ 69] ^ ks[ 67] ^ ks[ 64] ^ 0x9e3779b9 ^ 64), 11); + ks[ 73] = hc_rotl32_S ((ks[ 72] ^ ks[ 70] ^ ks[ 68] ^ ks[ 65] ^ 0x9e3779b9 ^ 65), 11); + ks[ 74] = hc_rotl32_S ((ks[ 73] ^ ks[ 71] ^ ks[ 69] ^ ks[ 66] ^ 0x9e3779b9 ^ 66), 11); + ks[ 75] = hc_rotl32_S ((ks[ 74] ^ ks[ 72] ^ ks[ 70] ^ ks[ 67] ^ 0x9e3779b9 ^ 67), 11); + ks[ 76] = hc_rotl32_S ((ks[ 75] ^ ks[ 73] ^ ks[ 71] ^ ks[ 68] ^ 0x9e3779b9 ^ 68), 11); + ks[ 77] = hc_rotl32_S ((ks[ 76] ^ ks[ 74] ^ ks[ 72] ^ ks[ 69] ^ 0x9e3779b9 ^ 69), 11); + ks[ 78] = hc_rotl32_S ((ks[ 77] ^ ks[ 75] ^ ks[ 73] ^ ks[ 70] ^ 0x9e3779b9 ^ 70), 11); + ks[ 79] = hc_rotl32_S ((ks[ 78] ^ ks[ 76] ^ ks[ 74] ^ ks[ 71] ^ 0x9e3779b9 ^ 71), 11); + ks[ 80] = hc_rotl32_S ((ks[ 79] ^ ks[ 77] ^ ks[ 75] ^ ks[ 72] ^ 0x9e3779b9 ^ 72), 11); + ks[ 81] = hc_rotl32_S ((ks[ 80] ^ ks[ 78] ^ ks[ 76] ^ ks[ 73] ^ 0x9e3779b9 ^ 73), 11); + ks[ 82] = hc_rotl32_S ((ks[ 81] ^ ks[ 79] ^ ks[ 77] ^ ks[ 74] ^ 0x9e3779b9 ^ 74), 11); + ks[ 83] = hc_rotl32_S ((ks[ 82] ^ ks[ 80] ^ ks[ 78] ^ ks[ 75] ^ 0x9e3779b9 ^ 75), 11); + ks[ 84] = hc_rotl32_S ((ks[ 83] ^ ks[ 81] ^ ks[ 79] ^ ks[ 76] ^ 0x9e3779b9 ^ 76), 11); + ks[ 85] = hc_rotl32_S ((ks[ 84] ^ ks[ 82] ^ ks[ 80] ^ ks[ 77] ^ 0x9e3779b9 ^ 77), 11); + ks[ 86] = hc_rotl32_S ((ks[ 85] ^ ks[ 83] ^ ks[ 81] ^ ks[ 78] ^ 0x9e3779b9 ^ 78), 11); + ks[ 87] = hc_rotl32_S ((ks[ 86] ^ ks[ 84] ^ ks[ 82] ^ ks[ 79] ^ 0x9e3779b9 ^ 79), 11); + ks[ 88] = hc_rotl32_S ((ks[ 87] ^ ks[ 85] ^ ks[ 83] ^ ks[ 80] ^ 0x9e3779b9 ^ 80), 11); + ks[ 89] = hc_rotl32_S ((ks[ 88] ^ ks[ 86] ^ ks[ 84] ^ ks[ 81] ^ 0x9e3779b9 ^ 81), 11); + ks[ 90] = hc_rotl32_S ((ks[ 89] ^ ks[ 87] ^ ks[ 85] ^ ks[ 82] ^ 0x9e3779b9 ^ 82), 11); + ks[ 91] = hc_rotl32_S ((ks[ 90] ^ ks[ 88] ^ ks[ 86] ^ ks[ 83] ^ 0x9e3779b9 ^ 83), 11); + ks[ 92] = hc_rotl32_S ((ks[ 91] ^ ks[ 89] ^ ks[ 87] ^ ks[ 84] ^ 0x9e3779b9 ^ 84), 11); + ks[ 93] = hc_rotl32_S ((ks[ 92] ^ ks[ 90] ^ ks[ 88] ^ ks[ 85] ^ 0x9e3779b9 ^ 85), 11); + ks[ 94] = hc_rotl32_S ((ks[ 93] ^ ks[ 91] ^ ks[ 89] ^ ks[ 86] ^ 0x9e3779b9 ^ 86), 11); + ks[ 95] = hc_rotl32_S ((ks[ 94] ^ ks[ 92] ^ ks[ 90] ^ ks[ 87] ^ 0x9e3779b9 ^ 87), 11); + ks[ 96] = hc_rotl32_S ((ks[ 95] ^ ks[ 93] ^ ks[ 91] ^ ks[ 88] ^ 0x9e3779b9 ^ 88), 11); + ks[ 97] = hc_rotl32_S ((ks[ 96] ^ ks[ 94] ^ ks[ 92] ^ ks[ 89] ^ 0x9e3779b9 ^ 89), 11); + ks[ 98] = hc_rotl32_S ((ks[ 97] ^ ks[ 95] ^ ks[ 93] ^ ks[ 90] ^ 0x9e3779b9 ^ 90), 11); + ks[ 99] = hc_rotl32_S ((ks[ 98] ^ ks[ 96] ^ ks[ 94] ^ ks[ 91] ^ 0x9e3779b9 ^ 91), 11); + ks[100] = hc_rotl32_S ((ks[ 99] ^ ks[ 97] ^ ks[ 95] ^ ks[ 92] ^ 0x9e3779b9 ^ 92), 11); + ks[101] = hc_rotl32_S ((ks[100] ^ ks[ 98] ^ ks[ 96] ^ ks[ 93] ^ 0x9e3779b9 ^ 93), 11); + ks[102] = hc_rotl32_S ((ks[101] ^ ks[ 99] ^ ks[ 97] ^ ks[ 94] ^ 0x9e3779b9 ^ 94), 11); + ks[103] = hc_rotl32_S ((ks[102] ^ ks[100] ^ ks[ 98] ^ ks[ 95] ^ 0x9e3779b9 ^ 95), 11); + ks[104] = hc_rotl32_S ((ks[103] ^ ks[101] ^ ks[ 99] ^ ks[ 96] ^ 0x9e3779b9 ^ 96), 11); + ks[105] = hc_rotl32_S ((ks[104] ^ ks[102] ^ ks[100] ^ ks[ 97] ^ 0x9e3779b9 ^ 97), 11); + ks[106] = hc_rotl32_S ((ks[105] ^ ks[103] ^ ks[101] ^ ks[ 98] ^ 0x9e3779b9 ^ 98), 11); + ks[107] = hc_rotl32_S ((ks[106] ^ ks[104] ^ ks[102] ^ ks[ 99] ^ 0x9e3779b9 ^ 99), 11); + ks[108] = hc_rotl32_S ((ks[107] ^ ks[105] ^ ks[103] ^ ks[100] ^ 0x9e3779b9 ^ 100), 11); + ks[109] = hc_rotl32_S ((ks[108] ^ ks[106] ^ ks[104] ^ ks[101] ^ 0x9e3779b9 ^ 101), 11); + ks[110] = hc_rotl32_S ((ks[109] ^ ks[107] ^ ks[105] ^ ks[102] ^ 0x9e3779b9 ^ 102), 11); + ks[111] = hc_rotl32_S ((ks[110] ^ ks[108] ^ ks[106] ^ ks[103] ^ 0x9e3779b9 ^ 103), 11); + ks[112] = hc_rotl32_S ((ks[111] ^ ks[109] ^ ks[107] ^ ks[104] ^ 0x9e3779b9 ^ 104), 11); + ks[113] = hc_rotl32_S ((ks[112] ^ ks[110] ^ ks[108] ^ ks[105] ^ 0x9e3779b9 ^ 105), 11); + ks[114] = hc_rotl32_S ((ks[113] ^ ks[111] ^ ks[109] ^ ks[106] ^ 0x9e3779b9 ^ 106), 11); + ks[115] = hc_rotl32_S ((ks[114] ^ ks[112] ^ ks[110] ^ ks[107] ^ 0x9e3779b9 ^ 107), 11); + ks[116] = hc_rotl32_S ((ks[115] ^ ks[113] ^ ks[111] ^ ks[108] ^ 0x9e3779b9 ^ 108), 11); + ks[117] = hc_rotl32_S ((ks[116] ^ ks[114] ^ ks[112] ^ ks[109] ^ 0x9e3779b9 ^ 109), 11); + ks[118] = hc_rotl32_S ((ks[117] ^ ks[115] ^ ks[113] ^ ks[110] ^ 0x9e3779b9 ^ 110), 11); + ks[119] = hc_rotl32_S ((ks[118] ^ ks[116] ^ ks[114] ^ ks[111] ^ 0x9e3779b9 ^ 111), 11); + ks[120] = hc_rotl32_S ((ks[119] ^ ks[117] ^ ks[115] ^ ks[112] ^ 0x9e3779b9 ^ 112), 11); + ks[121] = hc_rotl32_S ((ks[120] ^ ks[118] ^ ks[116] ^ ks[113] ^ 0x9e3779b9 ^ 113), 11); + ks[122] = hc_rotl32_S ((ks[121] ^ ks[119] ^ ks[117] ^ ks[114] ^ 0x9e3779b9 ^ 114), 11); + ks[123] = hc_rotl32_S ((ks[122] ^ ks[120] ^ ks[118] ^ ks[115] ^ 0x9e3779b9 ^ 115), 11); + ks[124] = hc_rotl32_S ((ks[123] ^ ks[121] ^ ks[119] ^ ks[116] ^ 0x9e3779b9 ^ 116), 11); + ks[125] = hc_rotl32_S ((ks[124] ^ ks[122] ^ ks[120] ^ ks[117] ^ 0x9e3779b9 ^ 117), 11); + ks[126] = hc_rotl32_S ((ks[125] ^ ks[123] ^ ks[121] ^ ks[118] ^ 0x9e3779b9 ^ 118), 11); + ks[127] = hc_rotl32_S ((ks[126] ^ ks[124] ^ ks[122] ^ ks[119] ^ 0x9e3779b9 ^ 119), 11); + ks[128] = hc_rotl32_S ((ks[127] ^ ks[125] ^ ks[123] ^ ks[120] ^ 0x9e3779b9 ^ 120), 11); + ks[129] = hc_rotl32_S ((ks[128] ^ ks[126] ^ ks[124] ^ ks[121] ^ 0x9e3779b9 ^ 121), 11); + ks[130] = hc_rotl32_S ((ks[129] ^ ks[127] ^ ks[125] ^ ks[122] ^ 0x9e3779b9 ^ 122), 11); + ks[131] = hc_rotl32_S ((ks[130] ^ ks[128] ^ ks[126] ^ ks[123] ^ 0x9e3779b9 ^ 123), 11); + ks[132] = hc_rotl32_S ((ks[131] ^ ks[129] ^ ks[127] ^ ks[124] ^ 0x9e3779b9 ^ 124), 11); + ks[133] = hc_rotl32_S ((ks[132] ^ ks[130] ^ ks[128] ^ ks[125] ^ 0x9e3779b9 ^ 125), 11); + ks[134] = hc_rotl32_S ((ks[133] ^ ks[131] ^ ks[129] ^ ks[126] ^ 0x9e3779b9 ^ 126), 11); + ks[135] = hc_rotl32_S ((ks[134] ^ ks[132] ^ ks[130] ^ ks[127] ^ 0x9e3779b9 ^ 127), 11); + ks[136] = hc_rotl32_S ((ks[135] ^ ks[133] ^ ks[131] ^ ks[128] ^ 0x9e3779b9 ^ 128), 11); + ks[137] = hc_rotl32_S ((ks[136] ^ ks[134] ^ ks[132] ^ ks[129] ^ 0x9e3779b9 ^ 129), 11); + ks[138] = hc_rotl32_S ((ks[137] ^ ks[135] ^ ks[133] ^ ks[130] ^ 0x9e3779b9 ^ 130), 11); + ks[139] = hc_rotl32_S ((ks[138] ^ ks[136] ^ ks[134] ^ ks[131] ^ 0x9e3779b9 ^ 131), 11); + + u32 a,b,c,d,e,f,g,h; + u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; + + k_set( 0,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 0,e,f,g,h); + k_set( 1,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 1,e,f,g,h); + k_set( 2,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get( 2,e,f,g,h); + k_set( 3,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get( 3,e,f,g,h); + k_set( 4,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get( 4,e,f,g,h); + k_set( 5,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get( 5,e,f,g,h); + k_set( 6,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get( 6,e,f,g,h); + k_set( 7,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get( 7,e,f,g,h); + k_set( 8,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 8,e,f,g,h); + k_set( 9,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 9,e,f,g,h); + k_set(10,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(10,e,f,g,h); + k_set(11,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(11,e,f,g,h); + k_set(12,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(12,e,f,g,h); + k_set(13,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(13,e,f,g,h); + k_set(14,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(14,e,f,g,h); + k_set(15,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(15,e,f,g,h); + k_set(16,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(16,e,f,g,h); + k_set(17,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(17,e,f,g,h); + k_set(18,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(18,e,f,g,h); + k_set(19,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(19,e,f,g,h); + k_set(20,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(20,e,f,g,h); + k_set(21,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(21,e,f,g,h); + k_set(22,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(22,e,f,g,h); + k_set(23,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(23,e,f,g,h); + k_set(24,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(24,e,f,g,h); + k_set(25,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(25,e,f,g,h); + k_set(26,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(26,e,f,g,h); + k_set(27,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(27,e,f,g,h); + k_set(28,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(28,e,f,g,h); + k_set(29,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(29,e,f,g,h); + k_set(30,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(30,e,f,g,h); + k_set(31,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(31,e,f,g,h); + k_set(32,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(32,e,f,g,h); +} + +DECLSPEC void serpent192_encrypt (const u32 *ks, const u32 *in, u32 *out) +{ + u32 a,b,c,d,e,f,g,h; + u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; + + a = in[0]; + b = in[1]; + c = in[2]; + d = in[3]; + + k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d); + k_xor(32,a,b,c,d); + + out[0] = a; + out[1] = b; + out[2] = c; + out[3] = d; +} + +DECLSPEC void serpent192_decrypt (const u32 *ks, const u32 *in, u32 *out) +{ + u32 a,b,c,d,e,f,g,h; + u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; + + a = in[0]; + b = in[1]; + c = in[2]; + d = in[3]; + + k_xor(32,a,b,c,d); + ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d); + irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d); + irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d); + irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d); + + out[0] = a; + out[1] = b; + out[2] = c; + out[3] = d; +} + // 256 bit key DECLSPEC void serpent256_set_key (u32 *ks, const u32 *ukey) diff --git a/OpenCL/inc_cipher_serpent.h b/OpenCL/inc_cipher_serpent.h index 7481f0d10..d6a3243f3 100644 --- a/OpenCL/inc_cipher_serpent.h +++ b/OpenCL/inc_cipher_serpent.h @@ -9,6 +9,9 @@ DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey); DECLSPEC void serpent128_encrypt (const u32 *ks, const u32 *in, u32 *out); DECLSPEC void serpent128_decrypt (const u32 *ks, const u32 *in, u32 *out); +DECLSPEC void serpent192_set_key (u32 *ks, const u32 *ukey); +DECLSPEC void serpent192_encrypt (const u32 *ks, const u32 *in, u32 *out); +DECLSPEC void serpent192_decrypt (const u32 *ks, const u32 *in, u32 *out); DECLSPEC void serpent256_set_key (u32 *ks, const u32 *ukey); DECLSPEC void serpent256_encrypt (const u32 *ks, const u32 *in, u32 *out); DECLSPEC void serpent256_decrypt (const u32 *ks, const u32 *in, u32 *out); diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl index 5ba55e341..fceb6f58b 100644 --- a/OpenCL/inc_cipher_twofish.cl +++ b/OpenCL/inc_cipher_twofish.cl @@ -18,6 +18,7 @@ /* -------------------------------------------------------------------- */ /* */ /* Cleaned and optimized for GPU use with hashcat by Jens Steube */ +/* Added 192-bit functions by Gabriele Gristina */ #include "inc_vendor.h" #include "inc_types.h" @@ -79,7 +80,8 @@ CONSTANT_VK u32a q_tab[2][256] = CONSTANT_VK u32a m_tab[4][256] = { - { 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, + { + 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, @@ -121,9 +123,10 @@ CONSTANT_VK u32a m_tab[4][256] = 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, - 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91 }, - - { 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, + 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91 + }, + { + 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, @@ -165,9 +168,10 @@ CONSTANT_VK u32a m_tab[4][256] = 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, - 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8 }, - - { 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, + 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8 + }, + { + 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, @@ -209,9 +213,10 @@ CONSTANT_VK u32a m_tab[4][256] = 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, - 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF }, - - { 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, + 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF + }, + { + 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, @@ -253,7 +258,8 @@ CONSTANT_VK u32a m_tab[4][256] = 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, - 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 } + 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 + } }; #define g1_fun128(x) \ @@ -268,6 +274,31 @@ CONSTANT_VK u32a m_tab[4][256] = mds (2, q22 (unpack_v8c_from_v32_S (x), sk)) ^ \ mds (3, q23 (unpack_v8d_from_v32_S (x), sk))) +#define g1_fun192(x) \ + (mds (0, q30 (unpack_v8d_from_v32_S (x), sk)) ^ \ + mds (1, q31 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (2, q32 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (3, q33 (unpack_v8c_from_v32_S (x), sk))) + +#define g0_fun192(x) \ + (mds (0, q30 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (1, q31 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (2, q32 (unpack_v8c_from_v32_S (x), sk)) ^ \ + mds (3, q33 (unpack_v8d_from_v32_S (x), sk))) + +#define g1_fun256(x) \ + (mds (0, q40 (unpack_v8d_from_v32_S (x), sk)) ^ \ + mds (1, q41 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (2, q42 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (3, q43 (unpack_v8c_from_v32_S (x), sk))) + +#define g0_fun256(x) \ + (mds (0, q40 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (1, q41 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (2, q42 (unpack_v8c_from_v32_S (x), sk)) ^ \ + mds (3, q43 (unpack_v8d_from_v32_S (x), sk))) + + #define f_rnd128(i) \ { \ const u32 t0 = g0_fun128 (data[0]); \ @@ -292,6 +323,30 @@ CONSTANT_VK u32a m_tab[4][256] = data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ } +#define f_rnd192(i) \ +{ \ + const u32 t0 = g0_fun192 (data[0]); \ + const u32 t1 = g1_fun192 (data[1]); \ + data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \ + data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \ + const u32 t2 = g0_fun192 (data[2]); \ + const u32 t3 = g1_fun192 (data[3]); \ + data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \ + data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \ +} + +#define i_rnd192(i) \ +{ \ + const u32 t0 = g0_fun192 (data[0]); \ + const u32 t1 = g1_fun192 (data[1]); \ + data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \ + data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \ + const u32 t2 = g0_fun192 (data[2]); \ + const u32 t3 = g1_fun192 (data[3]); \ + data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \ + data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ +} + #define f_rnd256(i) \ { \ const u32 t0 = g0_fun256 (data[0]); \ @@ -325,6 +380,11 @@ CONSTANT_VK u32a m_tab[4][256] = #define q22(x,k) q (1, q (0, x) ^ unpack_v8c_from_v32_S (k[1])) ^ unpack_v8c_from_v32_S (k[0]) #define q23(x,k) q (1, q (1, x) ^ unpack_v8d_from_v32_S (k[1])) ^ unpack_v8d_from_v32_S (k[0]) +#define q30(x,k) q (0, q (0, q (1, x) ^ unpack_v8a_from_v32_S (k[2])) ^ unpack_v8a_from_v32_S (k[1])) ^ unpack_v8a_from_v32_S (k[0]) +#define q31(x,k) q (0, q (1, q (1, x) ^ unpack_v8b_from_v32_S (k[2])) ^ unpack_v8b_from_v32_S (k[1])) ^ unpack_v8b_from_v32_S (k[0]) +#define q32(x,k) q (1, q (0, q (0, x) ^ unpack_v8c_from_v32_S (k[2])) ^ unpack_v8c_from_v32_S (k[1])) ^ unpack_v8c_from_v32_S (k[0]) +#define q33(x,k) q (1, q (1, q (0, x) ^ unpack_v8d_from_v32_S (k[2])) ^ unpack_v8d_from_v32_S (k[1])) ^ unpack_v8d_from_v32_S (k[0]) + #define q40(x,k) q (0, q (0, q (1, q (1, x) ^ unpack_v8a_from_v32_S (k[3])) ^ unpack_v8a_from_v32_S (k[2])) ^ unpack_v8a_from_v32_S (k[1])) ^ unpack_v8a_from_v32_S (k[0]) #define q41(x,k) q (0, q (1, q (1, q (0, x) ^ unpack_v8b_from_v32_S (k[3])) ^ unpack_v8b_from_v32_S (k[2])) ^ unpack_v8b_from_v32_S (k[1])) ^ unpack_v8b_from_v32_S (k[0]) #define q42(x,k) q (1, q (0, q (0, q (0, x) ^ unpack_v8c_from_v32_S (k[3])) ^ unpack_v8c_from_v32_S (k[2])) ^ unpack_v8c_from_v32_S (k[1])) ^ unpack_v8c_from_v32_S (k[0]) @@ -361,6 +421,8 @@ DECLSPEC u32 mds_rem (u32 p0, u32 p1) return p1; } +// 128 bit key + DECLSPEC u32 h_fun128 (const u32 x, const u32 *key) { u32 b0, b1, b2, b3; @@ -459,19 +521,115 @@ DECLSPEC void twofish128_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u out[3] = data[1] ^ lk[3]; } -// 256 bit key +// 192 bit key -#define g1_fun256(x) \ - (mds (0, q40 (unpack_v8d_from_v32_S (x), sk)) ^ \ - mds (1, q41 (unpack_v8a_from_v32_S (x), sk)) ^ \ - mds (2, q42 (unpack_v8b_from_v32_S (x), sk)) ^ \ - mds (3, q43 (unpack_v8c_from_v32_S (x), sk))) +DECLSPEC u32 h_fun192 (const u32 x, const u32 *key) +{ + u32 b0, b1, b2, b3; -#define g0_fun256(x) \ - (mds (0, q40 (unpack_v8a_from_v32_S (x), sk)) ^ \ - mds (1, q41 (unpack_v8b_from_v32_S (x), sk)) ^ \ - mds (2, q42 (unpack_v8c_from_v32_S (x), sk)) ^ \ - mds (3, q43 (unpack_v8d_from_v32_S (x), sk))) + b0 = unpack_v8a_from_v32_S (x); + b1 = unpack_v8b_from_v32_S (x); + b2 = unpack_v8c_from_v32_S (x); + b3 = unpack_v8d_from_v32_S (x); + + b0 = q (1, b0) ^ unpack_v8a_from_v32_S (key[2]); + b1 = q (1, b1) ^ unpack_v8b_from_v32_S (key[2]); + b2 = q (0, b2) ^ unpack_v8c_from_v32_S (key[2]); + b3 = q (0, b3) ^ unpack_v8d_from_v32_S (key[2]); + + b0 = q (0, (q (0, b0) ^ unpack_v8a_from_v32_S (key[1]))) ^ unpack_v8a_from_v32_S (key[0]); + b1 = q (0, (q (1, b1) ^ unpack_v8b_from_v32_S (key[1]))) ^ unpack_v8b_from_v32_S (key[0]); + b2 = q (1, (q (0, b2) ^ unpack_v8c_from_v32_S (key[1]))) ^ unpack_v8c_from_v32_S (key[0]); + b3 = q (1, (q (1, b3) ^ unpack_v8d_from_v32_S (key[1]))) ^ unpack_v8d_from_v32_S (key[0]); + + return mds (0, b0) ^ mds (1, b1) ^ mds (2, b2) ^ mds (3, b3); +} + +DECLSPEC void twofish192_set_key (u32 *sk, u32 *lk, const u32 *ukey) +{ + u32 me_key[3]; + + me_key[0] = ukey[0]; + me_key[1] = ukey[2]; + me_key[2] = ukey[4]; + + u32 mo_key[3]; + + mo_key[0] = ukey[1]; + mo_key[1] = ukey[3]; + mo_key[2] = ukey[5]; + + sk[2] = mds_rem (me_key[0], mo_key[0]); + sk[1] = mds_rem (me_key[1], mo_key[1]); + sk[0] = mds_rem (me_key[2], mo_key[2]); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 40; i += 2) + { + u32 a = 0x01010101 * i; + u32 b = 0x01010101 + a; + + a = h_fun192 (a, me_key); + b = h_fun192 (b, mo_key); + + b = hc_rotl32_S (b, 8); + + lk[i + 0] = a + b; + lk[i + 1] = hc_rotl32_S (a + 2 * b, 9); + } +} + +DECLSPEC void twofish192_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out) +{ + u32 data[4]; + + data[0] = in[0] ^ lk[0]; + data[1] = in[1] ^ lk[1]; + data[2] = in[2] ^ lk[2]; + data[3] = in[3] ^ lk[3]; + + f_rnd192 (0); + f_rnd192 (1); + f_rnd192 (2); + f_rnd192 (3); + f_rnd192 (4); + f_rnd192 (5); + f_rnd192 (6); + f_rnd192 (7); + + out[0] = data[2] ^ lk[4]; + out[1] = data[3] ^ lk[5]; + out[2] = data[0] ^ lk[6]; + out[3] = data[1] ^ lk[7]; +} + +DECLSPEC void twofish192_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out) +{ + u32 data[4]; + + data[0] = in[0] ^ lk[4]; + data[1] = in[1] ^ lk[5]; + data[2] = in[2] ^ lk[6]; + data[3] = in[3] ^ lk[7]; + + i_rnd192 (7); + i_rnd192 (6); + i_rnd192 (5); + i_rnd192 (4); + i_rnd192 (3); + i_rnd192 (2); + i_rnd192 (1); + i_rnd192 (0); + + out[0] = data[2] ^ lk[0]; + out[1] = data[3] ^ lk[1]; + out[2] = data[0] ^ lk[2]; + out[3] = data[1] ^ lk[3]; +} + +// 256 bit key DECLSPEC u32 h_fun256 (const u32 x, const u32 *key) { @@ -589,8 +747,15 @@ DECLSPEC void twofish256_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u #undef g1_fun128 #undef g0_fun128 +#undef g1_fun192 +#undef g0_fun192 +#undef g1_fun256 +#undef g0_fun256 + #undef f_rnd128 #undef i_rnd128 +#undef f_rnd192 +#undef i_rnd192 #undef f_rnd256 #undef i_rnd256 @@ -602,6 +767,12 @@ DECLSPEC void twofish256_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u #undef q21 #undef q22 #undef q23 + +#undef q30 +#undef q31 +#undef q32 +#undef q33 + #undef q40 #undef q41 #undef q42 diff --git a/OpenCL/inc_cipher_twofish.h b/OpenCL/inc_cipher_twofish.h index 9fc79e901..95b655353 100644 --- a/OpenCL/inc_cipher_twofish.h +++ b/OpenCL/inc_cipher_twofish.h @@ -11,6 +11,10 @@ DECLSPEC u32 h_fun128 (const u32 x, const u32 *key); DECLSPEC void twofish128_set_key (u32 *sk, u32 *lk, const u32 *ukey); DECLSPEC void twofish128_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); DECLSPEC void twofish128_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); +DECLSPEC u32 h_fun192 (const u32 x, const u32 *key); +DECLSPEC void twofish192_set_key (u32 *sk, u32 *lk, const u32 *ukey); +DECLSPEC void twofish192_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); +DECLSPEC void twofish192_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); DECLSPEC u32 h_fun256 (const u32 x, const u32 *key); DECLSPEC void twofish256_set_key (u32 *sk, u32 *lk, const u32 *ukey); DECLSPEC void twofish256_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index ba6edcc56..e55fabbb0 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1431,13 +1431,12 @@ DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1446,13 +1445,12 @@ DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1461,13 +1459,12 @@ DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1476,13 +1473,12 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1845,13 +1841,12 @@ DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1860,13 +1855,12 @@ DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1875,13 +1869,12 @@ DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1890,13 +1883,12 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1981,6 +1973,372 @@ DECLSPEC int find_hash (const u32 *digest, const u32 digests_cnt, GLOBAL_AS cons } #endif +// Input has to be zero padded and buffer size has to be multiple of 4 and at least of length 24 +// We simply ignore buffer length for the first 24 bytes for some extra speed boost :) +// Number of unrolls found by simply testing what gave best results + +DECLSPEC int hc_enc_scan (const u32 *buf, const int len) +{ + if (buf[0] & 0x80808080) return 1; + if (buf[1] & 0x80808080) return 1; + if (buf[2] & 0x80808080) return 1; + if (buf[3] & 0x80808080) return 1; + if (buf[4] & 0x80808080) return 1; + if (buf[5] & 0x80808080) return 1; + + for (int i = 24, j = 6; i < len; i += 4, j += 1) + { + if (buf[j] & 0x80808080) return 1; + } + + return 0; +} + +DECLSPEC int hc_enc_scan_global (GLOBAL_AS const u32 *buf, const int len) +{ + if (buf[0] & 0x80808080) return 1; + if (buf[1] & 0x80808080) return 1; + if (buf[2] & 0x80808080) return 1; + if (buf[3] & 0x80808080) return 1; + if (buf[4] & 0x80808080) return 1; + if (buf[5] & 0x80808080) return 1; + + for (int i = 24, j = 6; i < len; i += 4, j += 1) + { + if (buf[j] & 0x80808080) return 1; + } + + return 0; +} + +// Constants and some code snippets from unicode.org's ConvertUTF.c +// Compiler can perfectly translate some of the branches and switch cases this into MOVC +// which is faster than lookup tables + +#define halfShift 10 + +#define halfBase 0x0010000 +#define halfMask 0x3FF + +#define UNI_MAX_BMP 0xFFFF +#define UNI_SUR_HIGH_START 0xD800 +#define UNI_SUR_HIGH_END 0xDBFF +#define UNI_SUR_LOW_START 0xDC00 +#define UNI_SUR_LOW_END 0xDFFF + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ + +#define offsetsFromUTF8_0 0x00000000UL +#define offsetsFromUTF8_1 0x00003080UL +#define offsetsFromUTF8_2 0x000E2080UL +#define offsetsFromUTF8_3 0x03C82080UL +#define offsetsFromUTF8_4 0xFA082080UL +#define offsetsFromUTF8_5 0x82082080UL + +DECLSPEC void hc_enc_init (hc_enc_t *hc_enc) +{ + hc_enc->pos = 0; + + hc_enc->cbuf = 0; + hc_enc->clen = 0; +} + +DECLSPEC int hc_enc_has_next (hc_enc_t *hc_enc, const int sz) +{ + if (hc_enc->pos < sz) return 1; + + if (hc_enc->clen) return 1; + + return 0; +} + +// Input buffer and Output buffer size has to be multiple of 4 and at least of size 4. +// The output buffer is not zero padded, so entire buffer has to be set all zero before entering this function or truncated afterwards. + +DECLSPEC int hc_enc_next (hc_enc_t *hc_enc, const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz) +{ + const u8 *src_ptr = (const u8 *) src_buf; + u8 *dst_ptr = ( u8 *) dst_buf; + + int src_pos = hc_enc->pos; + + int dst_pos = hc_enc->clen; + + dst_buf[0] = hc_enc->cbuf; + + hc_enc->clen = 0; + hc_enc->cbuf = 0; + + while ((src_pos < src_len) && (dst_pos < dst_sz)) + { + const u8 c = src_ptr[src_pos]; + + int extraBytesToRead = 0; + + if (c >= 0xfc) + { + extraBytesToRead = 5; + } + else if (c >= 0xf8) + { + extraBytesToRead = 4; + } + else if (c >= 0xf0) + { + extraBytesToRead = 3; + } + else if (c >= 0xe0) + { + extraBytesToRead = 2; + } + else if (c >= 0xc0) + { + extraBytesToRead = 1; + } + + if ((src_pos + extraBytesToRead) >= src_sz) + { + // broken input + + hc_enc->pos = src_len; + + return dst_pos; + } + + u32 ch = 0; + + switch (extraBytesToRead) + { + case 5: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_5; + break; + case 4: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_4; + break; + case 3: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_3; + break; + case 2: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_2; + break; + case 1: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_1; + break; + case 0: + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_0; + break; + } + + /* Target is a character <= 0xFFFF */ + if (ch <= UNI_MAX_BMP) + { + dst_ptr[dst_pos++] = (ch >> 0) & 0xff; + dst_ptr[dst_pos++] = (ch >> 8) & 0xff; + } + else + { + ch -= halfBase; + + const u32 a = ((ch >> halfShift) + UNI_SUR_HIGH_START); + const u32 b = ((ch & halfMask) + UNI_SUR_LOW_START); + + if ((dst_pos + 2) == dst_sz) + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + + hc_enc->cbuf = b & 0xffff; + hc_enc->clen = 2; + } + else + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + dst_ptr[dst_pos++] = (b >> 0) & 0xff; + dst_ptr[dst_pos++] = (b >> 8) & 0xff; + } + } + } + + hc_enc->pos = src_pos; + + return dst_pos; +} + +DECLSPEC int hc_enc_next_global (hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz) +{ + GLOBAL_AS const u8 *src_ptr = (GLOBAL_AS const u8 *) src_buf; + u8 *dst_ptr = ( u8 *) dst_buf; + + int src_pos = hc_enc->pos; + + int dst_pos = hc_enc->clen; + + dst_buf[0] = hc_enc->cbuf; + + hc_enc->clen = 0; + hc_enc->cbuf = 0; + + while ((src_pos < src_len) && (dst_pos < dst_sz)) + { + const u8 c = src_ptr[src_pos]; + + int extraBytesToRead = 0; + + if (c >= 0xfc) + { + extraBytesToRead = 5; + } + else if (c >= 0xf8) + { + extraBytesToRead = 4; + } + else if (c >= 0xf0) + { + extraBytesToRead = 3; + } + else if (c >= 0xe0) + { + extraBytesToRead = 2; + } + else if (c >= 0xc0) + { + extraBytesToRead = 1; + } + + if ((src_pos + extraBytesToRead) >= src_sz) + { + // broken input + + hc_enc->pos = src_len; + + return dst_pos; + } + + u32 ch = 0; + + switch (extraBytesToRead) + { + case 5: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_5; + break; + case 4: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_4; + break; + case 3: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_3; + break; + case 2: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_2; + break; + case 1: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_1; + break; + case 0: + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_0; + break; + } + + /* Target is a character <= 0xFFFF */ + if (ch <= UNI_MAX_BMP) + { + dst_ptr[dst_pos++] = (ch >> 0) & 0xff; + dst_ptr[dst_pos++] = (ch >> 8) & 0xff; + } + else + { + ch -= halfBase; + + const u32 a = ((ch >> halfShift) + UNI_SUR_HIGH_START); + const u32 b = ((ch & halfMask) + UNI_SUR_LOW_START); + + if ((dst_pos + 2) == dst_sz) + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + + hc_enc->cbuf = b & 0xffff; + hc_enc->clen = 2; + } + else + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + dst_ptr[dst_pos++] = (b >> 0) & 0xff; + dst_ptr[dst_pos++] = (b >> 8) & 0xff; + } + } + } + + hc_enc->pos = src_pos; + + return dst_pos; +} + +#undef halfShift + +#undef halfBase +#undef halfMask + +#undef UNI_MAX_BMP +#undef UNI_SUR_HIGH_START +#undef UNI_SUR_HIGH_END +#undef UNI_SUR_LOW_START +#undef UNI_SUR_LOW_END + +#undef offsetsFromUTF8_0 +#undef offsetsFromUTF8_1 +#undef offsetsFromUTF8_2 +#undef offsetsFromUTF8_3 +#undef offsetsFromUTF8_4 +#undef offsetsFromUTF8_5 + DECLSPEC int pkcs_padding_bs8 (const u32 *data_buf, const int data_len) { if (data_len == 0) return -1; // cannot have zero length, is important to avoid out of boundary reads @@ -2146,17 +2504,17 @@ DECLSPEC u32 check (const u32 *digest, GLOBAL_AS const u32 *bitmap_s1_a, GLOBAL_ DECLSPEC void mark_hash (GLOBAL_AS plain_t *plains_buf, GLOBAL_AS u32 *d_result, const u32 salt_pos, const u32 digests_cnt, const u32 digest_pos, const u32 hash_pos, const u64 gid, const u32 il_pos, const u32 extra1, const u32 extra2) { - const u32 idx = atomic_inc (d_result); + const u32 idx = hc_atomic_inc (d_result); #if ATTACK_MODE == 9 #else if (idx >= digests_cnt) { - // this is kind of tricky: we *must* call atomic_inc() to know about the current value from a multi-thread perspective + // this is kind of tricky: we *must* call hc_atomic_inc() to know about the current value from a multi-thread perspective // this action creates a buffer overflow, so we need to fix it here - atomic_dec (d_result); + hc_atomic_dec (d_result); return; } diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index ebdbe4675..88e49ff5e 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -62,6 +62,7 @@ MAYBE_UNUSED const u32 digests_cnt, \ MAYBE_UNUSED const u32 digests_offset_host, \ MAYBE_UNUSED const u32 combs_mode, \ + MAYBE_UNUSED const u32 salt_repeat, \ MAYBE_UNUSED const u64 pws_pos, \ MAYBE_UNUSED const u64 gid_max #else @@ -100,6 +101,7 @@ MAYBE_UNUSED const u32 digests_cnt, \ MAYBE_UNUSED const u32 digests_offset_host, \ MAYBE_UNUSED const u32 combs_mode, \ + MAYBE_UNUSED const u32 salt_repeat, \ MAYBE_UNUSED const u64 pws_pos, \ MAYBE_UNUSED const u64 gid_max #endif @@ -234,6 +236,13 @@ DECLSPEC int hash_comp (const u32 *d1, GLOBAL_AS const u32 *d2); DECLSPEC int find_hash (const u32 *digest, const u32 digests_cnt, GLOBAL_AS const digest_t *digests_buf); #endif +DECLSPEC int hc_enc_scan (const u32 *buf, const int len); +DECLSPEC int hc_enc_scan_global (GLOBAL_AS const u32 *buf, const int len); +DECLSPEC void hc_enc_init (hc_enc_t *hc_enc); +DECLSPEC int hc_enc_has_next (hc_enc_t *hc_enc, const int sz); +DECLSPEC int hc_enc_next (hc_enc_t *hc_enc, const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz); +DECLSPEC int hc_enc_next_global (hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz); + DECLSPEC int pkcs_padding_bs8 (const u32 *data_buf, const int data_len); DECLSPEC int pkcs_padding_bs16 (const u32 *data_buf, const int data_len); DECLSPEC int asn1_detect (const u32 *buf, const int len); diff --git a/OpenCL/inc_comp_multi.cl b/OpenCL/inc_comp_multi.cl index f6cb0a08e..c402f37a5 100644 --- a/OpenCL/inc_comp_multi.cl +++ b/OpenCL/inc_comp_multi.cl @@ -24,7 +24,7 @@ if (check (digest_tp, { const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } diff --git a/OpenCL/inc_comp_multi_bs.cl b/OpenCL/inc_comp_multi_bs.cl index bc189a786..8ba742b26 100644 --- a/OpenCL/inc_comp_multi_bs.cl +++ b/OpenCL/inc_comp_multi_bs.cl @@ -26,7 +26,7 @@ if (check (digest_tp, { const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + slice, 0, 0); } diff --git a/OpenCL/inc_comp_single.cl b/OpenCL/inc_comp_single.cl index d78414ac6..a5fb8dd2e 100644 --- a/OpenCL/inc_comp_single.cl +++ b/OpenCL/inc_comp_single.cl @@ -5,7 +5,7 @@ if ((r0 == search[0]) { const u32 final_hash_pos = DIGESTS_OFFSET + 0; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); } diff --git a/OpenCL/inc_comp_single_bs.cl b/OpenCL/inc_comp_single_bs.cl index 0ef2989aa..e841ce005 100644 --- a/OpenCL/inc_comp_single_bs.cl +++ b/OpenCL/inc_comp_single_bs.cl @@ -3,7 +3,7 @@ if ((il_pos + slice) < il_cnt) { const u32 final_hash_pos = DIGESTS_OFFSET + 0; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + slice, 0, 0); } diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl index ff877ca11..bd583b976 100644 --- a/OpenCL/inc_ecc_secp256k1.cl +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -1730,14 +1730,16 @@ DECLSPEC void point_get_coords (secp256k1_t *r, const u32 *x, const u32 *y) r->xy[95] = neg[7]; } -DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps) +/* + * Convert the tweak/scalar k to w-NAF (window size is 4). + * @param naf out: w-NAF form of the tweak/scalar, a pointer to an u32 array with a size of 33. + * @param k in: tweak/scalar which should be converted, a pointer to an u32 array with a size of 8. + * @return Returns the loop start index. + */ +DECLSPEC int convert_to_window_naf (u32 *naf, const u32 *k) { - /* - * Convert the tweak/scalar k to w-NAF (window size is 4) - */ - + int loop_start = 0; u32 n[9]; - n[0] = 0; // we need this extra slot sometimes for the subtraction to work n[1] = k[7]; n[2] = k[6]; @@ -1748,10 +1750,6 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps n[7] = k[1]; n[8] = k[0]; - u32 naf[32 + 1] = { 0 }; // we need one extra slot - - int loop_start = 0; - for (int i = 0; i <= 256; i++) { if (n[8] & 1) @@ -1835,8 +1833,21 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps n[1] = n[1] >> 1 | n[0] << 31; n[0] = n[0] >> 1; } + return loop_start; +} - +/* + * @param x1 out: x coordinate, a pointer to an u32 array with a size of 8. + * @param y1 out: y coordinate, a pointer to an u32 array with a size of 8. + * @param k in: tweak/scalar which should be converted, a pointer to an u32 array with a size of 8. + * @param tmps in: a basepoint for the multiplication. + * @return Returns the x coordinate with a leading parity/sign (for odd/even y), it is named a compressed coordinate. + */ +DECLSPEC void point_mul_xy (u32 *x1, u32 *y1, const u32 *k, GLOBAL_AS const secp256k1_t *tmps) +{ + u32 naf[SECP256K1_NAF_SIZE] = { 0 }; + int loop_start = convert_to_window_naf(naf, k); + // first set: const u32 multiplier = (naf[loop_start >> 3] >> ((loop_start & 7) << 2)) & 0x0f; // or use u8 ? @@ -1846,7 +1857,6 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps const u32 x_pos = ((multiplier - 1 + odd) >> 1) * 24; const u32 y_pos = odd ? (x_pos + 8) : (x_pos + 16); - u32 x1[8]; x1[0] = tmps->xy[x_pos + 0]; x1[1] = tmps->xy[x_pos + 1]; @@ -1857,8 +1867,6 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps x1[6] = tmps->xy[x_pos + 6]; x1[7] = tmps->xy[x_pos + 7]; - u32 y1[8]; - y1[0] = tmps->xy[y_pos + 0]; y1[1] = tmps->xy[y_pos + 1]; y1[2] = tmps->xy[y_pos + 2]; @@ -1965,6 +1973,21 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps mul_mod (z1, z2, z1); // z1^3 mul_mod (y1, y1, z1); // y1_affine + + // return values are already in x1 and y1 +} + +/* + * @param r out: x coordinate with leading parity/sign (for odd/even y), a pointer to an u32 array with a size of 9. + * @param k in: tweak/scalar which should be converted, a pointer to an u32 array with a size of 8. + * @param tmps in: a basepoint for the multiplication. + * @return Returns the x coordinate with a leading parity/sign (for odd/even y), it is named a compressed coordinate. + */ +DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps) +{ + u32 x[8]; + u32 y[8]; + point_mul_xy(x, y, k, tmps); /* * output: @@ -1972,45 +1995,30 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps // shift by 1 byte (8 bits) to make room and add the parity/sign (for odd/even y): - r[8] = (x1[0] << 24); - r[7] = (x1[0] >> 8) | (x1[1] << 24); - r[6] = (x1[1] >> 8) | (x1[2] << 24); - r[5] = (x1[2] >> 8) | (x1[3] << 24); - r[4] = (x1[3] >> 8) | (x1[4] << 24); - r[3] = (x1[4] >> 8) | (x1[5] << 24); - r[2] = (x1[5] >> 8) | (x1[6] << 24); - r[1] = (x1[6] >> 8) | (x1[7] << 24); - r[0] = (x1[7] >> 8); + r[8] = (x[0] << 24); + r[7] = (x[0] >> 8) | (x[1] << 24); + r[6] = (x[1] >> 8) | (x[2] << 24); + r[5] = (x[2] >> 8) | (x[3] << 24); + r[4] = (x[3] >> 8) | (x[4] << 24); + r[3] = (x[4] >> 8) | (x[5] << 24); + r[2] = (x[5] >> 8) | (x[6] << 24); + r[1] = (x[6] >> 8) | (x[7] << 24); + r[0] = (x[7] >> 8); - const u32 type = 0x02 | (y1[0] & 1); // (note: 0b10 | 0b01 = 0x03) + const u32 type = 0x02 | (y[0] & 1); // (note: 0b10 | 0b01 = 0x03) r[0] = r[0] | type << 24; // 0x02 or 0x03 } -DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k) +/* + * Transform a x coordinate and separate parity to secp256k1_t. + * @param r out: x and y coordinates. + * @param x in: x coordinate which should be converted, a pointer to an u32 array with a size of 8. + * @param first_byte in: The parity of the y coordinate, a u32. + * @return Returns 0 if successfull, returns 1 if x is greater than the basepoint. + */ +DECLSPEC u32 transform_public (secp256k1_t *r, const u32 *x, const u32 first_byte) { - // verify: - - const u32 first_byte = k[0] & 0xff; - - if ((first_byte != '\x02') && (first_byte != '\x03')) - { - return 1; - } - - // load k into x without the first byte: - - u32 x[8]; - - x[0] = (k[7] & 0xff00) << 16 | (k[7] & 0xff0000) | (k[7] & 0xff000000) >> 16 | (k[8] & 0xff); - x[1] = (k[6] & 0xff00) << 16 | (k[6] & 0xff0000) | (k[6] & 0xff000000) >> 16 | (k[7] & 0xff); - x[2] = (k[5] & 0xff00) << 16 | (k[5] & 0xff0000) | (k[5] & 0xff000000) >> 16 | (k[6] & 0xff); - x[3] = (k[4] & 0xff00) << 16 | (k[4] & 0xff0000) | (k[4] & 0xff000000) >> 16 | (k[5] & 0xff); - x[4] = (k[3] & 0xff00) << 16 | (k[3] & 0xff0000) | (k[3] & 0xff000000) >> 16 | (k[4] & 0xff); - x[5] = (k[2] & 0xff00) << 16 | (k[2] & 0xff0000) | (k[2] & 0xff000000) >> 16 | (k[3] & 0xff); - x[6] = (k[1] & 0xff00) << 16 | (k[1] & 0xff0000) | (k[1] & 0xff000000) >> 16 | (k[2] & 0xff); - x[7] = (k[0] & 0xff00) << 16 | (k[0] & 0xff0000) | (k[0] & 0xff000000) >> 16 | (k[1] & 0xff); - u32 p[8]; p[0] = SECP256K1_P0; @@ -2062,3 +2070,163 @@ DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k) return 0; } + +/* + * Parse a x coordinate with leading parity to secp256k1_t. + * @param r out: x and y coordinates. + * @param k in: x coordinate which should be converted with leading parity, a pointer to an u32 array with a size of 9. + * @return Returns 0 if successfull, returns 1 if x is greater than the basepoint or the parity has an unexpected value. + */ +DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k) +{ + // verify: + + const u32 first_byte = k[0] & 0xff; + + if ((first_byte != '\x02') && (first_byte != '\x03')) + { + return 1; + } + + // load k into x without the first byte: + + u32 x[8]; + + x[0] = (k[7] & 0xff00) << 16 | (k[7] & 0xff0000) | (k[7] & 0xff000000) >> 16 | (k[8] & 0xff); + x[1] = (k[6] & 0xff00) << 16 | (k[6] & 0xff0000) | (k[6] & 0xff000000) >> 16 | (k[7] & 0xff); + x[2] = (k[5] & 0xff00) << 16 | (k[5] & 0xff0000) | (k[5] & 0xff000000) >> 16 | (k[6] & 0xff); + x[3] = (k[4] & 0xff00) << 16 | (k[4] & 0xff0000) | (k[4] & 0xff000000) >> 16 | (k[5] & 0xff); + x[4] = (k[3] & 0xff00) << 16 | (k[3] & 0xff0000) | (k[3] & 0xff000000) >> 16 | (k[4] & 0xff); + x[5] = (k[2] & 0xff00) << 16 | (k[2] & 0xff0000) | (k[2] & 0xff000000) >> 16 | (k[3] & 0xff); + x[6] = (k[1] & 0xff00) << 16 | (k[1] & 0xff0000) | (k[1] & 0xff000000) >> 16 | (k[2] & 0xff); + x[7] = (k[0] & 0xff00) << 16 | (k[0] & 0xff0000) | (k[0] & 0xff000000) >> 16 | (k[1] & 0xff); + + return transform_public(r, x, first_byte); +} + + +/* + * Set precomputed values of the basepoint g to a secp256k1 structure. + * @param r out: x and y coordinates. pre-computed points: (x1,y1,-y1),(x3,y3,-y3),(x5,y5,-y5),(x7,y7,-y7) + */ +DECLSPEC void set_precomputed_basepoint_g (secp256k1_t *r) { + // x1 + r->xy[ 0] = SECP256K1_G_PRE_COMPUTED_00; + r->xy[ 1] = SECP256K1_G_PRE_COMPUTED_01; + r->xy[ 2] = SECP256K1_G_PRE_COMPUTED_02; + r->xy[ 3] = SECP256K1_G_PRE_COMPUTED_03; + r->xy[ 4] = SECP256K1_G_PRE_COMPUTED_04; + r->xy[ 5] = SECP256K1_G_PRE_COMPUTED_05; + r->xy[ 6] = SECP256K1_G_PRE_COMPUTED_06; + r->xy[ 7] = SECP256K1_G_PRE_COMPUTED_07; + + // y1 + r->xy[ 8] = SECP256K1_G_PRE_COMPUTED_08; + r->xy[ 9] = SECP256K1_G_PRE_COMPUTED_09; + r->xy[10] = SECP256K1_G_PRE_COMPUTED_10; + r->xy[11] = SECP256K1_G_PRE_COMPUTED_11; + r->xy[12] = SECP256K1_G_PRE_COMPUTED_12; + r->xy[13] = SECP256K1_G_PRE_COMPUTED_13; + r->xy[14] = SECP256K1_G_PRE_COMPUTED_14; + r->xy[15] = SECP256K1_G_PRE_COMPUTED_15; + + // -y1 + r->xy[16] = SECP256K1_G_PRE_COMPUTED_16; + r->xy[17] = SECP256K1_G_PRE_COMPUTED_17; + r->xy[18] = SECP256K1_G_PRE_COMPUTED_18; + r->xy[19] = SECP256K1_G_PRE_COMPUTED_19; + r->xy[20] = SECP256K1_G_PRE_COMPUTED_20; + r->xy[21] = SECP256K1_G_PRE_COMPUTED_21; + r->xy[22] = SECP256K1_G_PRE_COMPUTED_22; + r->xy[23] = SECP256K1_G_PRE_COMPUTED_23; + + // x3 + r->xy[24] = SECP256K1_G_PRE_COMPUTED_24; + r->xy[25] = SECP256K1_G_PRE_COMPUTED_25; + r->xy[26] = SECP256K1_G_PRE_COMPUTED_26; + r->xy[27] = SECP256K1_G_PRE_COMPUTED_27; + r->xy[28] = SECP256K1_G_PRE_COMPUTED_28; + r->xy[29] = SECP256K1_G_PRE_COMPUTED_29; + r->xy[30] = SECP256K1_G_PRE_COMPUTED_30; + r->xy[31] = SECP256K1_G_PRE_COMPUTED_31; + + // y3 + r->xy[32] = SECP256K1_G_PRE_COMPUTED_32; + r->xy[33] = SECP256K1_G_PRE_COMPUTED_33; + r->xy[34] = SECP256K1_G_PRE_COMPUTED_34; + r->xy[35] = SECP256K1_G_PRE_COMPUTED_35; + r->xy[36] = SECP256K1_G_PRE_COMPUTED_36; + r->xy[37] = SECP256K1_G_PRE_COMPUTED_37; + r->xy[38] = SECP256K1_G_PRE_COMPUTED_38; + r->xy[39] = SECP256K1_G_PRE_COMPUTED_39; + + // -y3 + r->xy[40] = SECP256K1_G_PRE_COMPUTED_40; + r->xy[41] = SECP256K1_G_PRE_COMPUTED_41; + r->xy[42] = SECP256K1_G_PRE_COMPUTED_42; + r->xy[43] = SECP256K1_G_PRE_COMPUTED_43; + r->xy[44] = SECP256K1_G_PRE_COMPUTED_44; + r->xy[45] = SECP256K1_G_PRE_COMPUTED_45; + r->xy[46] = SECP256K1_G_PRE_COMPUTED_46; + r->xy[47] = SECP256K1_G_PRE_COMPUTED_47; + + // x5 + r->xy[48] = SECP256K1_G_PRE_COMPUTED_48; + r->xy[49] = SECP256K1_G_PRE_COMPUTED_49; + r->xy[50] = SECP256K1_G_PRE_COMPUTED_50; + r->xy[51] = SECP256K1_G_PRE_COMPUTED_51; + r->xy[52] = SECP256K1_G_PRE_COMPUTED_52; + r->xy[53] = SECP256K1_G_PRE_COMPUTED_53; + r->xy[54] = SECP256K1_G_PRE_COMPUTED_54; + r->xy[55] = SECP256K1_G_PRE_COMPUTED_55; + + // y5 + r->xy[56] = SECP256K1_G_PRE_COMPUTED_56; + r->xy[57] = SECP256K1_G_PRE_COMPUTED_57; + r->xy[58] = SECP256K1_G_PRE_COMPUTED_58; + r->xy[59] = SECP256K1_G_PRE_COMPUTED_59; + r->xy[60] = SECP256K1_G_PRE_COMPUTED_60; + r->xy[61] = SECP256K1_G_PRE_COMPUTED_61; + r->xy[62] = SECP256K1_G_PRE_COMPUTED_62; + r->xy[63] = SECP256K1_G_PRE_COMPUTED_63; + + // -y5 + r->xy[64] = SECP256K1_G_PRE_COMPUTED_64; + r->xy[65] = SECP256K1_G_PRE_COMPUTED_65; + r->xy[66] = SECP256K1_G_PRE_COMPUTED_66; + r->xy[67] = SECP256K1_G_PRE_COMPUTED_67; + r->xy[68] = SECP256K1_G_PRE_COMPUTED_68; + r->xy[69] = SECP256K1_G_PRE_COMPUTED_69; + r->xy[70] = SECP256K1_G_PRE_COMPUTED_70; + r->xy[71] = SECP256K1_G_PRE_COMPUTED_71; + + // x7 + r->xy[72] = SECP256K1_G_PRE_COMPUTED_72; + r->xy[73] = SECP256K1_G_PRE_COMPUTED_73; + r->xy[74] = SECP256K1_G_PRE_COMPUTED_74; + r->xy[75] = SECP256K1_G_PRE_COMPUTED_75; + r->xy[76] = SECP256K1_G_PRE_COMPUTED_76; + r->xy[77] = SECP256K1_G_PRE_COMPUTED_77; + r->xy[78] = SECP256K1_G_PRE_COMPUTED_78; + r->xy[79] = SECP256K1_G_PRE_COMPUTED_79; + + // y7 + r->xy[80] = SECP256K1_G_PRE_COMPUTED_80; + r->xy[81] = SECP256K1_G_PRE_COMPUTED_81; + r->xy[82] = SECP256K1_G_PRE_COMPUTED_82; + r->xy[83] = SECP256K1_G_PRE_COMPUTED_83; + r->xy[84] = SECP256K1_G_PRE_COMPUTED_84; + r->xy[85] = SECP256K1_G_PRE_COMPUTED_85; + r->xy[86] = SECP256K1_G_PRE_COMPUTED_86; + r->xy[87] = SECP256K1_G_PRE_COMPUTED_87; + + // -y7 + r->xy[88] = SECP256K1_G_PRE_COMPUTED_88; + r->xy[89] = SECP256K1_G_PRE_COMPUTED_89; + r->xy[90] = SECP256K1_G_PRE_COMPUTED_90; + r->xy[91] = SECP256K1_G_PRE_COMPUTED_91; + r->xy[92] = SECP256K1_G_PRE_COMPUTED_92; + r->xy[93] = SECP256K1_G_PRE_COMPUTED_93; + r->xy[94] = SECP256K1_G_PRE_COMPUTED_94; + r->xy[95] = SECP256K1_G_PRE_COMPUTED_95; +} diff --git a/OpenCL/inc_ecc_secp256k1.h b/OpenCL/inc_ecc_secp256k1.h index 9a8e069d2..4a45d8b8c 100644 --- a/OpenCL/inc_ecc_secp256k1.h +++ b/OpenCL/inc_ecc_secp256k1.h @@ -10,6 +10,8 @@ #define SECP256K1_B 7 +// finite field Fp +// p = FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F #define SECP256K1_P0 0xfffffc2f #define SECP256K1_P1 0xfffffffe #define SECP256K1_P2 0xffffffff @@ -19,6 +21,8 @@ #define SECP256K1_P6 0xffffffff #define SECP256K1_P7 0xffffffff +// prime order N +// n = FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE BAAEDCE6 AF48A03B BFD25E8C D0364141 #define SECP256K1_N0 0xd0364141 #define SECP256K1_N1 0xbfd25e8c #define SECP256K1_N2 0xaf48a03b @@ -28,14 +32,194 @@ #define SECP256K1_N6 0xffffffff #define SECP256K1_N7 0xffffffff +// the base point G in compressed form for transform_public +// G = 02 79BE667E F9DCBBAC 55A06295 CE870B07 029BFCDB 2DCE28D9 59F2815B 16F81798 +#define SECP256K1_G_PARITY 0x00000002 +#define SECP256K1_G0 0x16f81798 +#define SECP256K1_G1 0x59f2815b +#define SECP256K1_G2 0x2dce28d9 +#define SECP256K1_G3 0x029bfcdb +#define SECP256K1_G4 0xce870b07 +#define SECP256K1_G5 0x55a06295 +#define SECP256K1_G6 0xf9dcbbac +#define SECP256K1_G7 0x79be667e + +// the base point G in compressed form for parse_public +// parity and reversed byte/char (8 bit) byte order +// G = 02 79BE667E F9DCBBAC 55A06295 CE870B07 029BFCDB 2DCE28D9 59F2815B 16F81798 +#define SECP256K1_G_STRING0 0x66be7902 +#define SECP256K1_G_STRING1 0xbbdcf97e +#define SECP256K1_G_STRING2 0x62a055ac +#define SECP256K1_G_STRING3 0x0b87ce95 +#define SECP256K1_G_STRING4 0xfc9b0207 +#define SECP256K1_G_STRING5 0x28ce2ddb +#define SECP256K1_G_STRING6 0x81f259d9 +#define SECP256K1_G_STRING7 0x17f8165b +#define SECP256K1_G_STRING8 0x00000098 + +// pre computed values, can be verified using private keys for +// x1 is the same as the basepoint g +// x1 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU73sVHnoWn +// x3 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU74sHUHy8S +// x5 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU75s2EPgZf +// x7 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU76rnZwVdz + +// x1: 79BE667E F9DCBBAC 55A06295 CE870B07 029BFCDB 2DCE28D9 59F2815B 16F81798 +// x1: 79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798 +#define SECP256K1_G_PRE_COMPUTED_00 0x16f81798 +#define SECP256K1_G_PRE_COMPUTED_01 0x59f2815b +#define SECP256K1_G_PRE_COMPUTED_02 0x2dce28d9 +#define SECP256K1_G_PRE_COMPUTED_03 0x029bfcdb +#define SECP256K1_G_PRE_COMPUTED_04 0xce870b07 +#define SECP256K1_G_PRE_COMPUTED_05 0x55a06295 +#define SECP256K1_G_PRE_COMPUTED_06 0xf9dcbbac +#define SECP256K1_G_PRE_COMPUTED_07 0x79be667e + +// y1: 483ADA77 26A3C465 5DA4FBFC 0E1108A8 FD17B448 A6855419 9C47D08F FB10D4B8 +// y1: 483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8 +#define SECP256K1_G_PRE_COMPUTED_08 0xfb10d4b8 +#define SECP256K1_G_PRE_COMPUTED_09 0x9c47d08f +#define SECP256K1_G_PRE_COMPUTED_10 0xa6855419 +#define SECP256K1_G_PRE_COMPUTED_11 0xfd17b448 +#define SECP256K1_G_PRE_COMPUTED_12 0x0e1108a8 +#define SECP256K1_G_PRE_COMPUTED_13 0x5da4fbfc +#define SECP256K1_G_PRE_COMPUTED_14 0x26a3c465 +#define SECP256K1_G_PRE_COMPUTED_15 0x483ada77 + +// -y1: B7C52588 D95C3B9A A25B0403 F1EEF757 02E84BB7 597AABE6 63B82F6F 04EF2777 +// -y1: B7C52588D95C3B9AA25B0403F1EEF75702E84BB7597AABE663B82F6F04EF2777 +#define SECP256K1_G_PRE_COMPUTED_16 0x04ef2777 +#define SECP256K1_G_PRE_COMPUTED_17 0x63b82f6f +#define SECP256K1_G_PRE_COMPUTED_18 0x597aabe6 +#define SECP256K1_G_PRE_COMPUTED_19 0x02e84bb7 +#define SECP256K1_G_PRE_COMPUTED_20 0xf1eef757 +#define SECP256K1_G_PRE_COMPUTED_21 0xa25b0403 +#define SECP256K1_G_PRE_COMPUTED_22 0xd95c3b9a +#define SECP256K1_G_PRE_COMPUTED_23 0xb7c52588 + +// x3: F9308A01 9258C310 49344F85 F89D5229 B531C845 836F99B0 8601F113 BCE036F9 +// x3: F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9 +#define SECP256K1_G_PRE_COMPUTED_24 0xbce036f9 +#define SECP256K1_G_PRE_COMPUTED_25 0x8601f113 +#define SECP256K1_G_PRE_COMPUTED_26 0x836f99b0 +#define SECP256K1_G_PRE_COMPUTED_27 0xb531c845 +#define SECP256K1_G_PRE_COMPUTED_28 0xf89d5229 +#define SECP256K1_G_PRE_COMPUTED_29 0x49344f85 +#define SECP256K1_G_PRE_COMPUTED_30 0x9258c310 +#define SECP256K1_G_PRE_COMPUTED_31 0xf9308a01 + +// y3: 388F7B0F 632DE814 0FE337E6 2A37F356 6500A999 34C2231B 6CB9FD75 84B8E672 +// y3: 388F7B0F632DE8140FE337E62A37F3566500A99934C2231B6CB9FD7584B8E672 +#define SECP256K1_G_PRE_COMPUTED_32 0x84b8e672 +#define SECP256K1_G_PRE_COMPUTED_33 0x6cb9fd75 +#define SECP256K1_G_PRE_COMPUTED_34 0x34c2231b +#define SECP256K1_G_PRE_COMPUTED_35 0x6500a999 +#define SECP256K1_G_PRE_COMPUTED_36 0x2a37f356 +#define SECP256K1_G_PRE_COMPUTED_37 0x0fe337e6 +#define SECP256K1_G_PRE_COMPUTED_38 0x632de814 +#define SECP256K1_G_PRE_COMPUTED_39 0x388f7b0f + +// -y3: C77084F0 9CD217EB F01CC819 D5C80CA9 9AFF5666 CB3DDCE4 93460289 7B4715BD +// -y3: C77084F09CD217EBF01CC819D5C80CA99AFF5666CB3DDCE4934602897B4715BD +#define SECP256K1_G_PRE_COMPUTED_40 0x7b4715bd +#define SECP256K1_G_PRE_COMPUTED_41 0x93460289 +#define SECP256K1_G_PRE_COMPUTED_42 0xcb3ddce4 +#define SECP256K1_G_PRE_COMPUTED_43 0x9aff5666 +#define SECP256K1_G_PRE_COMPUTED_44 0xd5c80ca9 +#define SECP256K1_G_PRE_COMPUTED_45 0xf01cc819 +#define SECP256K1_G_PRE_COMPUTED_46 0x9cd217eb +#define SECP256K1_G_PRE_COMPUTED_47 0xc77084f0 + +// x5: 2F8BDE4D 1A072093 55B4A725 0A5C5128 E88B84BD DC619AB7 CBA8D569 B240EFE4 +// x5: 2F8BDE4D1A07209355B4A7250A5C5128E88B84BDDC619AB7CBA8D569B240EFE4 +#define SECP256K1_G_PRE_COMPUTED_48 0xb240efe4 +#define SECP256K1_G_PRE_COMPUTED_49 0xcba8d569 +#define SECP256K1_G_PRE_COMPUTED_50 0xdc619ab7 +#define SECP256K1_G_PRE_COMPUTED_51 0xe88b84bd +#define SECP256K1_G_PRE_COMPUTED_52 0x0a5c5128 +#define SECP256K1_G_PRE_COMPUTED_53 0x55b4a725 +#define SECP256K1_G_PRE_COMPUTED_54 0x1a072093 +#define SECP256K1_G_PRE_COMPUTED_55 0x2f8bde4d + +// y5: D8AC2226 36E5E3D6 D4DBA9DD A6C9C426 F788271B AB0D6840 DCA87D3A A6AC62D6 +// y5: D8AC222636E5E3D6D4DBA9DDA6C9C426F788271BAB0D6840DCA87D3AA6AC62D6 +#define SECP256K1_G_PRE_COMPUTED_56 0xa6ac62d6 +#define SECP256K1_G_PRE_COMPUTED_57 0xdca87d3a +#define SECP256K1_G_PRE_COMPUTED_58 0xab0d6840 +#define SECP256K1_G_PRE_COMPUTED_59 0xf788271b +#define SECP256K1_G_PRE_COMPUTED_60 0xa6c9c426 +#define SECP256K1_G_PRE_COMPUTED_61 0xd4dba9dd +#define SECP256K1_G_PRE_COMPUTED_62 0x36e5e3d6 +#define SECP256K1_G_PRE_COMPUTED_63 0xd8ac2226 + +// -y5: 2753DDD9 C91A1C29 2B245622 59363BD9 0877D8E4 54F297BF 235782C4 59539959 +// -y5: 2753DDD9C91A1C292B24562259363BD90877D8E454F297BF235782C459539959 +#define SECP256K1_G_PRE_COMPUTED_64 0x59539959 +#define SECP256K1_G_PRE_COMPUTED_65 0x235782c4 +#define SECP256K1_G_PRE_COMPUTED_66 0x54f297bf +#define SECP256K1_G_PRE_COMPUTED_67 0x0877d8e4 +#define SECP256K1_G_PRE_COMPUTED_68 0x59363bd9 +#define SECP256K1_G_PRE_COMPUTED_69 0x2b245622 +#define SECP256K1_G_PRE_COMPUTED_70 0xc91a1c29 +#define SECP256K1_G_PRE_COMPUTED_71 0x2753ddd9 + +// x7: 5CBDF064 6E5DB4EA A398F365 F2EA7A0E 3D419B7E 0330E39C E92BDDED CAC4F9BC +// x7: 5CBDF0646E5DB4EAA398F365F2EA7A0E3D419B7E0330E39CE92BDDEDCAC4F9BC +#define SECP256K1_G_PRE_COMPUTED_72 0xcac4f9bc +#define SECP256K1_G_PRE_COMPUTED_73 0xe92bdded +#define SECP256K1_G_PRE_COMPUTED_74 0x0330e39c +#define SECP256K1_G_PRE_COMPUTED_75 0x3d419b7e +#define SECP256K1_G_PRE_COMPUTED_76 0xf2ea7a0e +#define SECP256K1_G_PRE_COMPUTED_77 0xa398f365 +#define SECP256K1_G_PRE_COMPUTED_78 0x6e5db4ea +#define SECP256K1_G_PRE_COMPUTED_79 0x5cbdf064 + +// y7: 6AEBCA40 BA255960 A3178D6D 861A54DB A813D0B8 13FDE7B5 A5082628 087264DA +// y7: 6AEBCA40BA255960A3178D6D861A54DBA813D0B813FDE7B5A5082628087264DA +#define SECP256K1_G_PRE_COMPUTED_80 0x087264da +#define SECP256K1_G_PRE_COMPUTED_81 0xa5082628 +#define SECP256K1_G_PRE_COMPUTED_82 0x13fde7b5 +#define SECP256K1_G_PRE_COMPUTED_83 0xa813d0b8 +#define SECP256K1_G_PRE_COMPUTED_84 0x861a54db +#define SECP256K1_G_PRE_COMPUTED_85 0xa3178d6d +#define SECP256K1_G_PRE_COMPUTED_86 0xba255960 +#define SECP256K1_G_PRE_COMPUTED_87 0x6aebca40 + +// -y7: 951435BF 45DAA69F 5CE87292 79E5AB24 57EC2F47 EC02184A 5AF7D9D6 F78D9755 +// -y7: 951435BF45DAA69F5CE8729279E5AB2457EC2F47EC02184A5AF7D9D6F78D9755 +#define SECP256K1_G_PRE_COMPUTED_88 0xf78d9755 +#define SECP256K1_G_PRE_COMPUTED_89 0x5af7d9d6 +#define SECP256K1_G_PRE_COMPUTED_90 0xec02184a +#define SECP256K1_G_PRE_COMPUTED_91 0x57ec2f47 +#define SECP256K1_G_PRE_COMPUTED_92 0x79e5ab24 +#define SECP256K1_G_PRE_COMPUTED_93 0x5ce87292 +#define SECP256K1_G_PRE_COMPUTED_94 0x45daa69f +#define SECP256K1_G_PRE_COMPUTED_95 0x951435bf + +#define SECP256K1_PRE_COMPUTED_XY_SIZE 96 +#define SECP256K1_NAF_SIZE 33 // 32+1, we need one extra slot + +#define PUBLIC_KEY_LENGTH_WITHOUT_PARITY 8 +#define PUBLIC_KEY_LENGTH_X_Y_WITHOUT_PARITY 16 +// 8+1 to make room for the parity +#define PUBLIC_KEY_LENGTH_WITH_PARITY 9 + +// (32*8 == 256) +#define PRIVATE_KEY_LENGTH 8 + typedef struct secp256k1 { - u32 xy[96]; // pre-computed points: (x1,y1,-y1),(x3,y3,-y3),(x5,y5,-y5),(x7,y7,-y7) + u32 xy[SECP256K1_PRE_COMPUTED_XY_SIZE]; // pre-computed points: (x1,y1,-y1),(x3,y3,-y3),(x5,y5,-y5),(x7,y7,-y7) } secp256k1_t; + +DECLSPEC u32 transform_public (secp256k1_t *r, const u32 *x, const u32 first_byte); DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k); +DECLSPEC void point_mul_xy (u32 *x1, u32 *y1, const u32 *k, GLOBAL_AS const secp256k1_t *tmps); DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps); +DECLSPEC void set_precomputed_basepoint_g (secp256k1_t *r); + #endif // _INC_ECC_SECP256K1_H diff --git a/OpenCL/inc_hash_blake2b.cl b/OpenCL/inc_hash_blake2b.cl index ac4377c2f..a90ee9e03 100644 --- a/OpenCL/inc_hash_blake2b.cl +++ b/OpenCL/inc_hash_blake2b.cl @@ -9,7 +9,7 @@ #include "inc_common.h" #include "inc_hash_blake2b.h" -DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const u32 len, const u64 f0) +DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const int len, const u64 f0) { const u64 t0 = hl32_to_64_S (0, len); @@ -86,9 +86,11 @@ DECLSPEC void blake2b_init (blake2b_ctx_t *ctx) ctx->len = 0; } -DECLSPEC void blake2b_update_128 (blake2b_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const u32 len) +DECLSPEC void blake2b_update_128 (blake2b_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - MAYBE_VOLATILE const u32 pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; if (pos == 0) { @@ -195,7 +197,7 @@ DECLSPEC void blake2b_update_128 (blake2b_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, ctx->len += len; } -DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len) +DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const int len) { u32 w0[4]; u32 w1[4]; @@ -285,7 +287,7 @@ DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len) blake2b_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - (u32) pos1); } -DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const u32 len) +DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { u32 w0[4]; u32 w1[4]; @@ -457,9 +459,11 @@ DECLSPEC void blake2b_init_vector (blake2b_ctx_vector_t *ctx) ctx->len = 0; } -DECLSPEC void blake2b_update_vector_128 (blake2b_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const u32 len) +DECLSPEC void blake2b_update_vector_128 (blake2b_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - MAYBE_VOLATILE const u32 pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; if (pos == 0) { @@ -566,7 +570,7 @@ DECLSPEC void blake2b_update_vector_128 (blake2b_ctx_vector_t *ctx, u32x *w0, u3 ctx->len += len; } -DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const u32 len) +DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const int len) { u32x w0[4]; u32x w1[4]; diff --git a/OpenCL/inc_hash_blake2b.h b/OpenCL/inc_hash_blake2b.h index 702027ce1..afcacf368 100644 --- a/OpenCL/inc_hash_blake2b.h +++ b/OpenCL/inc_hash_blake2b.h @@ -62,7 +62,7 @@ typedef struct blake2b_ctx u64 m[16]; // buffer u64 h[ 8]; // digest - u32 len; + int len; } blake2b_ctx_t; @@ -71,19 +71,19 @@ typedef struct blake2b_ctx_vector u64x m[16]; // buffer u64x h[ 8]; // digest - u32 len; + int len; } blake2b_ctx_vector_t; -DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const u32 len, const u64 f0); +DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const int len, const u64 f0); DECLSPEC void blake2b_init (blake2b_ctx_t *ctx); -DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len); -DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const u32 len); +DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void blake2b_final (blake2b_ctx_t *ctx); DECLSPEC void blake2b_transform_vector (u64x *h, const u64x *m, const u32x len, const u64 f0); DECLSPEC void blake2b_init_vector (blake2b_ctx_vector_t *ctx); -DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const u32 len); +DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const int len); DECLSPEC void blake2b_final_vector (blake2b_ctx_vector_t *ctx); #endif // _INC_HASH_BLAKE2B_H diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl index eeb28cd17..9da981841 100644 --- a/OpenCL/inc_hash_md4.cl +++ b/OpenCL/inc_hash_md4.cl @@ -107,7 +107,9 @@ DECLSPEC void md4_init (md4_ctx_t *ctx) DECLSPEC void md4_update_64 (md4_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -363,6 +365,24 @@ DECLSPEC void md4_update_swap (md4_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md4_update_utf16le (md4_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -405,6 +425,41 @@ DECLSPEC void md4_update_utf16le (md4_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md4_update_utf16le_swap (md4_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -619,6 +674,24 @@ DECLSPEC void md4_update_global_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, co DECLSPEC void md4_update_global_utf16le (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -661,6 +734,41 @@ DECLSPEC void md4_update_global_utf16le (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -737,7 +845,7 @@ DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, GLOBAL_AS const u3 DECLSPEC void md4_final (md4_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1246,7 +1354,9 @@ DECLSPEC void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx DECLSPEC void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1620,7 +1730,7 @@ DECLSPEC void md4_update_vector_utf16le_swap (md4_ctx_vector_t *ctx, const u32x DECLSPEC void md4_final_vector (md4_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl index 3c52c1f40..d9e4fcecd 100644 --- a/OpenCL/inc_hash_md5.cl +++ b/OpenCL/inc_hash_md5.cl @@ -143,7 +143,9 @@ DECLSPEC void md5_init (md5_ctx_t *ctx) DECLSPEC void md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -399,6 +401,24 @@ DECLSPEC void md5_update_swap (md5_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md5_update_utf16le (md5_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -441,6 +461,41 @@ DECLSPEC void md5_update_utf16le (md5_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md5_update_utf16le_swap (md5_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -655,6 +710,24 @@ DECLSPEC void md5_update_global_swap (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, co DECLSPEC void md5_update_global_utf16le (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -697,6 +770,41 @@ DECLSPEC void md5_update_global_utf16le (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, DECLSPEC void md5_update_global_utf16le_swap (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -773,7 +881,7 @@ DECLSPEC void md5_update_global_utf16le_swap (md5_ctx_t *ctx, GLOBAL_AS const u3 DECLSPEC void md5_final (md5_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1318,7 +1426,9 @@ DECLSPEC void md5_init_vector_from_scalar (md5_ctx_vector_t *ctx, md5_ctx_t *ctx DECLSPEC void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1692,7 +1802,7 @@ DECLSPEC void md5_update_vector_utf16le_swap (md5_ctx_vector_t *ctx, const u32x DECLSPEC void md5_final_vector (md5_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl index bcf1074ac..da0830dc7 100644 --- a/OpenCL/inc_hash_ripemd160.cl +++ b/OpenCL/inc_hash_ripemd160.cl @@ -241,7 +241,9 @@ DECLSPEC void ripemd160_init (ripemd160_ctx_t *ctx) DECLSPEC void ripemd160_update_64 (ripemd160_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -497,6 +499,24 @@ DECLSPEC void ripemd160_update_swap (ripemd160_ctx_t *ctx, const u32 *w, const i DECLSPEC void ripemd160_update_utf16le (ripemd160_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -539,6 +559,41 @@ DECLSPEC void ripemd160_update_utf16le (ripemd160_ctx_t *ctx, const u32 *w, cons DECLSPEC void ripemd160_update_utf16le_swap (ripemd160_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -753,6 +808,24 @@ DECLSPEC void ripemd160_update_global_swap (ripemd160_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void ripemd160_update_global_utf16le (ripemd160_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -795,6 +868,41 @@ DECLSPEC void ripemd160_update_global_utf16le (ripemd160_ctx_t *ctx, GLOBAL_AS c DECLSPEC void ripemd160_update_global_utf16le_swap (ripemd160_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -871,7 +979,7 @@ DECLSPEC void ripemd160_update_global_utf16le_swap (ripemd160_ctx_t *ctx, GLOBAL DECLSPEC void ripemd160_final (ripemd160_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1515,7 +1623,9 @@ DECLSPEC void ripemd160_init_vector_from_scalar (ripemd160_ctx_vector_t *ctx, ri DECLSPEC void ripemd160_update_vector_64 (ripemd160_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1889,7 +1999,7 @@ DECLSPEC void ripemd160_update_vector_utf16le_swap (ripemd160_ctx_vector_t *ctx, DECLSPEC void ripemd160_final_vector (ripemd160_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl index a8f754c1a..0ac702926 100644 --- a/OpenCL/inc_hash_sha1.cl +++ b/OpenCL/inc_hash_sha1.cl @@ -356,7 +356,9 @@ DECLSPEC void sha1_init (sha1_ctx_t *ctx) DECLSPEC void sha1_update_64 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -612,6 +614,24 @@ DECLSPEC void sha1_update_swap (sha1_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void sha1_update_utf16le (sha1_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -654,6 +674,41 @@ DECLSPEC void sha1_update_utf16le (sha1_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void sha1_update_utf16le_swap (sha1_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -986,6 +1041,24 @@ DECLSPEC void sha1_update_global_swap (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, DECLSPEC void sha1_update_global_utf16le (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1028,6 +1101,41 @@ DECLSPEC void sha1_update_global_utf16le (sha1_ctx_t *ctx, GLOBAL_AS const u32 * DECLSPEC void sha1_update_global_utf16le_swap (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1222,7 +1330,7 @@ DECLSPEC void sha1_update_global_utf16be_swap (sha1_ctx_t *ctx, GLOBAL_AS const DECLSPEC void sha1_final (sha1_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1983,7 +2091,9 @@ DECLSPEC void sha1_init_vector_from_scalar (sha1_ctx_vector_t *ctx, sha1_ctx_t * DECLSPEC void sha1_update_vector_64 (sha1_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -2441,7 +2551,7 @@ DECLSPEC void sha1_update_vector_utf16beN (sha1_ctx_vector_t *ctx, const u32x *w DECLSPEC void sha1_final_vector (sha1_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl index 72f3dac99..5733578e4 100644 --- a/OpenCL/inc_hash_sha224.cl +++ b/OpenCL/inc_hash_sha224.cl @@ -158,7 +158,9 @@ DECLSPEC void sha224_init (sha224_ctx_t *ctx) DECLSPEC void sha224_update_64 (sha224_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -414,6 +416,24 @@ DECLSPEC void sha224_update_swap (sha224_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha224_update_utf16le (sha224_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -456,6 +476,41 @@ DECLSPEC void sha224_update_utf16le (sha224_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha224_update_utf16le_swap (sha224_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -670,6 +725,24 @@ DECLSPEC void sha224_update_global_swap (sha224_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha224_update_global_utf16le (sha224_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -712,6 +785,41 @@ DECLSPEC void sha224_update_global_utf16le (sha224_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -788,7 +896,7 @@ DECLSPEC void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha224_final (sha224_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1332,7 +1440,9 @@ DECLSPEC void sha224_init_vector_from_scalar (sha224_ctx_vector_t *ctx, sha224_c DECLSPEC void sha224_update_vector_64 (sha224_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1748,7 +1858,7 @@ DECLSPEC void sha224_update_vector_utf16beN (sha224_ctx_vector_t *ctx, const u32 DECLSPEC void sha224_final_vector (sha224_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl index 430b0e8b9..6954ce3b1 100644 --- a/OpenCL/inc_hash_sha256.cl +++ b/OpenCL/inc_hash_sha256.cl @@ -158,7 +158,9 @@ DECLSPEC void sha256_init (sha256_ctx_t *ctx) DECLSPEC void sha256_update_64 (sha256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -414,6 +416,24 @@ DECLSPEC void sha256_update_swap (sha256_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha256_update_utf16le (sha256_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -456,6 +476,41 @@ DECLSPEC void sha256_update_utf16le (sha256_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha256_update_utf16le_swap (sha256_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -670,6 +725,24 @@ DECLSPEC void sha256_update_global_swap (sha256_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha256_update_global_utf16le (sha256_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -712,6 +785,41 @@ DECLSPEC void sha256_update_global_utf16le (sha256_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -788,7 +896,7 @@ DECLSPEC void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha256_final (sha256_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1332,7 +1440,9 @@ DECLSPEC void sha256_init_vector_from_scalar (sha256_ctx_vector_t *ctx, sha256_c DECLSPEC void sha256_update_vector_64 (sha256_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1748,7 +1858,7 @@ DECLSPEC void sha256_update_vector_utf16beN (sha256_ctx_vector_t *ctx, const u32 DECLSPEC void sha256_final_vector (sha256_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl index d63a5ab2b..e1126e532 100644 --- a/OpenCL/inc_hash_sha384.cl +++ b/OpenCL/inc_hash_sha384.cl @@ -178,7 +178,9 @@ DECLSPEC void sha384_init (sha384_ctx_t *ctx) DECLSPEC void sha384_update_128 (sha384_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -622,6 +624,24 @@ DECLSPEC void sha384_update_swap (sha384_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha384_update_utf16le (sha384_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -688,6 +708,57 @@ DECLSPEC void sha384_update_utf16le (sha384_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha384_update_utf16le_swap (sha384_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1062,6 +1133,24 @@ DECLSPEC void sha384_update_global_swap (sha384_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha384_update_global_utf16le (sha384_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1128,6 +1217,57 @@ DECLSPEC void sha384_update_global_utf16le (sha384_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1260,7 +1400,7 @@ DECLSPEC void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha384_final (sha384_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -2048,7 +2188,9 @@ DECLSPEC void sha384_init_vector_from_scalar (sha384_ctx_vector_t *ctx, sha384_c DECLSPEC void sha384_update_vector_128 (sha384_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -2756,7 +2898,7 @@ DECLSPEC void sha384_update_vector_utf16beN (sha384_ctx_vector_t *ctx, const u32 DECLSPEC void sha384_final_vector (sha384_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl index 0f5ca288a..1bc87cb58 100644 --- a/OpenCL/inc_hash_sha512.cl +++ b/OpenCL/inc_hash_sha512.cl @@ -178,7 +178,9 @@ DECLSPEC void sha512_init (sha512_ctx_t *ctx) DECLSPEC void sha512_update_128 (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -622,6 +624,24 @@ DECLSPEC void sha512_update_swap (sha512_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha512_update_utf16le (sha512_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -688,6 +708,57 @@ DECLSPEC void sha512_update_utf16le (sha512_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha512_update_utf16le_swap (sha512_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1062,6 +1133,24 @@ DECLSPEC void sha512_update_global_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha512_update_global_utf16le (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1128,6 +1217,57 @@ DECLSPEC void sha512_update_global_utf16le (sha512_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1260,7 +1400,7 @@ DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha512_final (sha512_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -1774,6 +1914,105 @@ DECLSPEC void sha512_hmac_init_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha512_hmac_init_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + // forced full decode in one round + + u32 enc_buf[256]; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + if (enc_len > 128) + { + sha512_ctx_t tmp; + + sha512_init (&tmp); + + sha512_update_utf16le_swap (&tmp, enc_buf, enc_len); + + sha512_final (&tmp); + + enc_buf[ 0] = h32_from_64_S (tmp.h[0]); + enc_buf[ 1] = l32_from_64_S (tmp.h[0]); + enc_buf[ 2] = h32_from_64_S (tmp.h[1]); + enc_buf[ 3] = l32_from_64_S (tmp.h[1]); + enc_buf[ 4] = h32_from_64_S (tmp.h[2]); + enc_buf[ 5] = l32_from_64_S (tmp.h[2]); + enc_buf[ 6] = h32_from_64_S (tmp.h[3]); + enc_buf[ 7] = l32_from_64_S (tmp.h[3]); + enc_buf[ 8] = h32_from_64_S (tmp.h[4]); + enc_buf[ 9] = l32_from_64_S (tmp.h[4]); + enc_buf[10] = h32_from_64_S (tmp.h[5]); + enc_buf[11] = l32_from_64_S (tmp.h[5]); + enc_buf[12] = h32_from_64_S (tmp.h[6]); + enc_buf[13] = l32_from_64_S (tmp.h[6]); + enc_buf[14] = h32_from_64_S (tmp.h[7]); + enc_buf[15] = l32_from_64_S (tmp.h[7]); + enc_buf[16] = 0; + enc_buf[17] = 0; + enc_buf[18] = 0; + enc_buf[19] = 0; + enc_buf[20] = 0; + enc_buf[21] = 0; + enc_buf[22] = 0; + enc_buf[23] = 0; + enc_buf[24] = 0; + enc_buf[25] = 0; + enc_buf[26] = 0; + enc_buf[27] = 0; + enc_buf[28] = 0; + enc_buf[29] = 0; + enc_buf[30] = 0; + enc_buf[31] = 0; + } + else + { + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + } + + sha512_hmac_init_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -2165,7 +2404,9 @@ DECLSPEC void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_c DECLSPEC void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -2873,7 +3114,7 @@ DECLSPEC void sha512_update_vector_utf16beN (sha512_ctx_vector_t *ctx, const u32 DECLSPEC void sha512_final_vector (sha512_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); diff --git a/OpenCL/inc_hash_streebog256.cl b/OpenCL/inc_hash_streebog256.cl index b44349cfa..58d08131d 100644 --- a/OpenCL/inc_hash_streebog256.cl +++ b/OpenCL/inc_hash_streebog256.cl @@ -758,7 +758,9 @@ DECLSPEC void streebog256_transform (streebog256_ctx_t *ctx, const u32 *w0, cons DECLSPEC void streebog256_update_64 (streebog256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1047,7 +1049,7 @@ DECLSPEC void streebog256_update_global_swap (streebog256_ctx_t *ctx, GLOBAL_AS DECLSPEC void streebog256_final (streebog256_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1457,7 +1459,9 @@ DECLSPEC void streebog256_transform_vector (streebog256_ctx_vector_t *ctx, const DECLSPEC void streebog256_update_vector_64 (streebog256_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1689,7 +1693,7 @@ DECLSPEC void streebog256_update_vector_swap (streebog256_ctx_vector_t *ctx, con DECLSPEC void streebog256_final_vector (streebog256_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_VV (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_streebog512.cl b/OpenCL/inc_hash_streebog512.cl index 9d1b83ead..45ce8dbb2 100644 --- a/OpenCL/inc_hash_streebog512.cl +++ b/OpenCL/inc_hash_streebog512.cl @@ -758,7 +758,9 @@ DECLSPEC void streebog512_transform (streebog512_ctx_t *ctx, const u32 *w0, cons DECLSPEC void streebog512_update_64 (streebog512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1047,7 +1049,7 @@ DECLSPEC void streebog512_update_global_swap (streebog512_ctx_t *ctx, GLOBAL_AS DECLSPEC void streebog512_final (streebog512_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1476,7 +1478,9 @@ DECLSPEC void streebog512_transform_vector (streebog512_ctx_vector_t *ctx, const DECLSPEC void streebog512_update_vector_64 (streebog512_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1708,7 +1712,7 @@ DECLSPEC void streebog512_update_vector_swap (streebog512_ctx_vector_t *ctx, con DECLSPEC void streebog512_final_vector (streebog512_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_VV (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index 2835fc72d..efc518f39 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -762,7 +762,9 @@ DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 *s_MT0, SHM_TYP DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1018,6 +1020,24 @@ DECLSPEC void whirlpool_update_swap (whirlpool_ctx_t *ctx, const u32 *w, const i DECLSPEC void whirlpool_update_utf16le (whirlpool_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1060,6 +1080,41 @@ DECLSPEC void whirlpool_update_utf16le (whirlpool_ctx_t *ctx, const u32 *w, cons DECLSPEC void whirlpool_update_utf16le_swap (whirlpool_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1274,6 +1329,24 @@ DECLSPEC void whirlpool_update_global_swap (whirlpool_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void whirlpool_update_global_utf16le (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1316,6 +1389,41 @@ DECLSPEC void whirlpool_update_global_utf16le (whirlpool_ctx_t *ctx, GLOBAL_AS c DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1392,7 +1500,7 @@ DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, GLOBAL DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -2052,7 +2160,9 @@ DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, wh DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -2426,7 +2536,7 @@ DECLSPEC void whirlpool_update_vector_utf16le_swap (whirlpool_ctx_vector_t *ctx, DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 768de504a..52eaa8121 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -85,17 +85,21 @@ CONSTANT_VK u32 generic_constant[8192]; // 32k #endif -DECLSPEC u32 atomic_dec (u32 *p) +DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p) { - return atomicSub (p, 1); + volatile const u32 val = 1; + + return atomicSub (p, val); } -DECLSPEC u32 atomic_inc (u32 *p) +DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p) { - return atomicAdd (p, 1); + volatile const u32 val = 1; + + return atomicAdd (p, val); } -DECLSPEC u32 atomic_or (u32 *p, u32 val) +DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val) { return atomicOr (p, val); } @@ -161,6 +165,26 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n) #endif #ifdef IS_OPENCL + +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p) +{ + volatile const u32 val = 1; + + return atomic_sub (p, val); +} + +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p) +{ + volatile const u32 val = 1; + + return atomic_add (p, val); +} + +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val) +{ + return atomic_or (p, val); +} + #define FIXED_THREAD_COUNT(n) __attribute__((reqd_work_group_size((n), 1, 1))) #define SYNC_THREADS() barrier (CLK_LOCAL_MEM_FENCE) #endif diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h index fdcf50fc1..c405bed0e 100644 --- a/OpenCL/inc_platform.h +++ b/OpenCL/inc_platform.h @@ -6,7 +6,15 @@ #ifndef _INC_PLATFORM_H #define _INC_PLATFORM_H +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); + #ifdef IS_AMD +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); + DECLSPEC u64x rotl64 (const u64x a, const int n); DECLSPEC u64x rotr64 (const u64x a, const int n); DECLSPEC u64 rotl64_S (const u64 a, const int n); @@ -14,9 +22,10 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n); #endif #ifdef IS_CUDA -DECLSPEC u32 atomic_dec (u32 *p); -DECLSPEC u32 atomic_inc (u32 *p); -DECLSPEC u32 atomic_or (u32 *p, u32 val); +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); + DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused))); DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused))); DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused))); diff --git a/OpenCL/inc_scalar.cl b/OpenCL/inc_scalar.cl index e9ea12315..dc392f0ba 100644 --- a/OpenCL/inc_scalar.cl +++ b/OpenCL/inc_scalar.cl @@ -9,7 +9,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); \ } \ @@ -33,7 +33,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); \ } \ diff --git a/OpenCL/inc_simd.h b/OpenCL/inc_simd.h index fb0cb1e5d..f30f07dc3 100644 --- a/OpenCL/inc_simd.h +++ b/OpenCL/inc_simd.h @@ -19,7 +19,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); \ } \ @@ -43,7 +43,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); \ } \ @@ -68,7 +68,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -78,7 +78,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -103,7 +103,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -123,7 +123,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -146,7 +146,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -156,7 +156,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -166,7 +166,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ @@ -176,7 +176,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ @@ -203,7 +203,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -223,7 +223,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -243,7 +243,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ @@ -263,7 +263,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ @@ -286,7 +286,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -296,7 +296,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -306,7 +306,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ @@ -316,7 +316,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ @@ -325,7 +325,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ @@ -335,7 +335,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ @@ -345,7 +345,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ @@ -355,7 +355,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ @@ -386,7 +386,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -406,7 +406,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -426,7 +426,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ @@ -446,7 +446,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ @@ -465,7 +465,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ @@ -485,7 +485,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ @@ -505,7 +505,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ @@ -525,7 +525,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ @@ -548,7 +548,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -558,7 +558,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -568,7 +568,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ @@ -578,7 +578,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ @@ -587,7 +587,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ @@ -597,7 +597,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ @@ -607,7 +607,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ @@ -617,7 +617,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ @@ -627,7 +627,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 8) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 8) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 8, 0, 0); \ } \ @@ -637,7 +637,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 9) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 9) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 9, 0, 0); \ } \ @@ -647,7 +647,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 10) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 10) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 10, 0, 0); \ } \ @@ -657,7 +657,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 11) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 11) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 11, 0, 0); \ } \ @@ -667,7 +667,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 12) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 12) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 12, 0, 0); \ } \ @@ -677,7 +677,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 13) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 13) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 13, 0, 0); \ } \ @@ -687,7 +687,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 14) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 14) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 14, 0, 0); \ } \ @@ -697,7 +697,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 15) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 15) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 15, 0, 0); \ } \ @@ -736,7 +736,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ @@ -756,7 +756,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ @@ -776,7 +776,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ @@ -796,7 +796,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ @@ -816,7 +816,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ @@ -836,7 +836,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ @@ -856,7 +856,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ @@ -876,7 +876,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ @@ -896,7 +896,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 8) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 8) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 8, 0, 0); \ } \ @@ -916,7 +916,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 9) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 9) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 9, 0, 0); \ } \ @@ -936,7 +936,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 10) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 10) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 10, 0, 0); \ } \ @@ -956,7 +956,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 11) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 11) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 11, 0, 0); \ } \ @@ -976,7 +976,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 12) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 12) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 12, 0, 0); \ } \ @@ -996,7 +996,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 13) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 13) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 13, 0, 0); \ } \ @@ -1016,7 +1016,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 14) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 14) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 14, 0, 0); \ } \ @@ -1036,7 +1036,7 @@ { \ const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 15) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 15) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 15, 0, 0); \ } \ diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index 0c6218156..4c608abe0 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -171,6 +171,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b)); } @@ -224,6 +227,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1)); } +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1); } #endif @@ -337,6 +343,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); } @@ -390,6 +399,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); } +inline __device__ u64x operator % (const u64x a, const u32 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); } #endif @@ -519,6 +531,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); } @@ -572,6 +587,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); } +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); } #endif @@ -733,6 +751,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); } @@ -786,6 +807,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); } +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); } #endif @@ -1618,6 +1642,7 @@ typedef struct salt u32 salt_iter; u32 salt_iter2; u32 salt_sign[2]; + u32 salt_repeats; u32 orig_pos; @@ -1701,4 +1726,13 @@ typedef struct keyboard_layout_mapping } keyboard_layout_mapping_t; +typedef struct hc_enc +{ + int pos; // source offset + + u32 cbuf; // carry buffer + int clen; // carry length + +} hc_enc_t; + #endif diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index 6ca2c5707..9299caa3f 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -37,10 +37,6 @@ #define KERNEL_FQ __kernel #endif -#ifndef MAYBE_VOLATILE -#define MAYBE_VOLATILE -#endif - #ifndef MAYBE_UNUSED #define MAYBE_UNUSED #endif diff --git a/OpenCL/m00000_a3-optimized.cl b/OpenCL/m00000_a3-optimized.cl index 147943deb..bc18951c9 100644 --- a/OpenCL/m00000_a3-optimized.cl +++ b/OpenCL/m00000_a3-optimized.cl @@ -464,7 +464,7 @@ KERNEL_FQ void m00000_m04 (KERN_ATTR_VECTOR ()) * main */ - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_m08 (KERN_ATTR_VECTOR ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m00000_m08 (KERN_ATTR_VECTOR ()) * main */ - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_m16 (KERN_ATTR_VECTOR ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m00000_m16 (KERN_ATTR_VECTOR ()) * main */ - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_s04 (KERN_ATTR_VECTOR ()) @@ -578,7 +578,7 @@ KERNEL_FQ void m00000_s04 (KERN_ATTR_VECTOR ()) * main */ - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_s08 (KERN_ATTR_VECTOR ()) @@ -616,7 +616,7 @@ KERNEL_FQ void m00000_s08 (KERN_ATTR_VECTOR ()) * main */ - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_s16 (KERN_ATTR_VECTOR ()) @@ -654,5 +654,5 @@ KERNEL_FQ void m00000_s16 (KERN_ATTR_VECTOR ()) * main */ - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00010_a3-optimized.cl b/OpenCL/m00010_a3-optimized.cl index 406572cd8..cbea16ddb 100644 --- a/OpenCL/m00010_a3-optimized.cl +++ b/OpenCL/m00010_a3-optimized.cl @@ -513,7 +513,7 @@ KERNEL_FQ void m00010_m04 (KERN_ATTR_VECTOR ()) * main */ - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_m08 (KERN_ATTR_VECTOR ()) @@ -551,7 +551,7 @@ KERNEL_FQ void m00010_m08 (KERN_ATTR_VECTOR ()) * main */ - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_m16 (KERN_ATTR_VECTOR ()) @@ -589,7 +589,7 @@ KERNEL_FQ void m00010_m16 (KERN_ATTR_VECTOR ()) * main */ - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_s04 (KERN_ATTR_VECTOR ()) @@ -627,7 +627,7 @@ KERNEL_FQ void m00010_s04 (KERN_ATTR_VECTOR ()) * main */ - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_s08 (KERN_ATTR_VECTOR ()) @@ -665,7 +665,7 @@ KERNEL_FQ void m00010_s08 (KERN_ATTR_VECTOR ()) * main */ - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_s16 (KERN_ATTR_VECTOR ()) @@ -703,5 +703,5 @@ KERNEL_FQ void m00010_s16 (KERN_ATTR_VECTOR ()) * main */ - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00020_a3-optimized.cl b/OpenCL/m00020_a3-optimized.cl index a1d89b0d6..3e962a671 100644 --- a/OpenCL/m00020_a3-optimized.cl +++ b/OpenCL/m00020_a3-optimized.cl @@ -423,7 +423,7 @@ KERNEL_FQ void m00020_m04 (KERN_ATTR_BASIC ()) * main */ - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_m08 (KERN_ATTR_BASIC ()) @@ -470,7 +470,7 @@ KERNEL_FQ void m00020_m08 (KERN_ATTR_BASIC ()) * main */ - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_m16 (KERN_ATTR_BASIC ()) @@ -517,7 +517,7 @@ KERNEL_FQ void m00020_m16 (KERN_ATTR_BASIC ()) * main */ - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_s04 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00020_s04 (KERN_ATTR_BASIC ()) * main */ - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_s08 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00020_s08 (KERN_ATTR_BASIC ()) * main */ - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_s16 (KERN_ATTR_BASIC ()) @@ -658,5 +658,5 @@ KERNEL_FQ void m00020_s16 (KERN_ATTR_BASIC ()) * main */ - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00030_a3-optimized.cl b/OpenCL/m00030_a3-optimized.cl index 6d610bd2c..c39666ab6 100644 --- a/OpenCL/m00030_a3-optimized.cl +++ b/OpenCL/m00030_a3-optimized.cl @@ -513,7 +513,7 @@ KERNEL_FQ void m00030_m04 (KERN_ATTR_VECTOR ()) * main */ - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_m08 (KERN_ATTR_VECTOR ()) @@ -551,7 +551,7 @@ KERNEL_FQ void m00030_m08 (KERN_ATTR_VECTOR ()) * main */ - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_m16 (KERN_ATTR_VECTOR ()) @@ -589,7 +589,7 @@ KERNEL_FQ void m00030_m16 (KERN_ATTR_VECTOR ()) * main */ - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_s04 (KERN_ATTR_VECTOR ()) @@ -627,7 +627,7 @@ KERNEL_FQ void m00030_s04 (KERN_ATTR_VECTOR ()) * main */ - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_s08 (KERN_ATTR_VECTOR ()) @@ -665,7 +665,7 @@ KERNEL_FQ void m00030_s08 (KERN_ATTR_VECTOR ()) * main */ - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_s16 (KERN_ATTR_VECTOR ()) @@ -703,5 +703,5 @@ KERNEL_FQ void m00030_s16 (KERN_ATTR_VECTOR ()) * main */ - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00040_a3-optimized.cl b/OpenCL/m00040_a3-optimized.cl index 721712bff..5a8563b1f 100644 --- a/OpenCL/m00040_a3-optimized.cl +++ b/OpenCL/m00040_a3-optimized.cl @@ -423,7 +423,7 @@ KERNEL_FQ void m00040_m04 (KERN_ATTR_BASIC ()) * main */ - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_m08 (KERN_ATTR_BASIC ()) @@ -470,7 +470,7 @@ KERNEL_FQ void m00040_m08 (KERN_ATTR_BASIC ()) * main */ - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_m16 (KERN_ATTR_BASIC ()) @@ -517,7 +517,7 @@ KERNEL_FQ void m00040_m16 (KERN_ATTR_BASIC ()) * main */ - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_s04 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00040_s04 (KERN_ATTR_BASIC ()) * main */ - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_s08 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00040_s08 (KERN_ATTR_BASIC ()) * main */ - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_s16 (KERN_ATTR_BASIC ()) @@ -658,5 +658,5 @@ KERNEL_FQ void m00040_s16 (KERN_ATTR_BASIC ()) * main */ - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00050_a3-optimized.cl b/OpenCL/m00050_a3-optimized.cl index 67dfe41b3..16fa863dd 100644 --- a/OpenCL/m00050_a3-optimized.cl +++ b/OpenCL/m00050_a3-optimized.cl @@ -365,7 +365,7 @@ KERNEL_FQ void m00050_m04 (KERN_ATTR_BASIC ()) * main */ - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_m08 (KERN_ATTR_BASIC ()) @@ -412,7 +412,7 @@ KERNEL_FQ void m00050_m08 (KERN_ATTR_BASIC ()) * main */ - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_m16 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m00050_m16 (KERN_ATTR_BASIC ()) * main */ - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_s04 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m00050_s04 (KERN_ATTR_BASIC ()) * main */ - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_s08 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m00050_s08 (KERN_ATTR_BASIC ()) * main */ - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_s16 (KERN_ATTR_BASIC ()) @@ -600,5 +600,5 @@ KERNEL_FQ void m00050_s16 (KERN_ATTR_BASIC ()) * main */ - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00060_a3-optimized.cl b/OpenCL/m00060_a3-optimized.cl index 266b692ec..4121c3293 100644 --- a/OpenCL/m00060_a3-optimized.cl +++ b/OpenCL/m00060_a3-optimized.cl @@ -361,7 +361,7 @@ KERNEL_FQ void m00060_m04 (KERN_ATTR_BASIC ()) * main */ - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_m08 (KERN_ATTR_BASIC ()) @@ -408,7 +408,7 @@ KERNEL_FQ void m00060_m08 (KERN_ATTR_BASIC ()) * main */ - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_m16 (KERN_ATTR_BASIC ()) @@ -455,7 +455,7 @@ KERNEL_FQ void m00060_m16 (KERN_ATTR_BASIC ()) * main */ - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_s04 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m00060_s04 (KERN_ATTR_BASIC ()) * main */ - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_s08 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m00060_s08 (KERN_ATTR_BASIC ()) * main */ - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_s16 (KERN_ATTR_BASIC ()) @@ -596,5 +596,5 @@ KERNEL_FQ void m00060_s16 (KERN_ATTR_BASIC ()) * main */ - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00070_a0-optimized.cl b/OpenCL/m00070_a0-optimized.cl new file mode 100644 index 000000000..4b4f0a915 --- /dev/null +++ b/OpenCL/m00070_a0-optimized.cl @@ -0,0 +1,320 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + w3[2] = out_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00070_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00070_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + w3[2] = out_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00070_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m00070_a0-pure.cl b/OpenCL/m00070_a0-pure.cl new file mode 100644 index 000000000..d39773fef --- /dev/null +++ b/OpenCL/m00070_a0-pure.cl @@ -0,0 +1,117 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_utf16le (&ctx, tmp.i, tmp.pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00070_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_utf16le (&ctx, tmp.i, tmp.pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00070_a1-optimized.cl b/OpenCL/m00070_a1-optimized.cl new file mode 100644 index 000000000..18500db3a --- /dev/null +++ b/OpenCL/m00070_a1-optimized.cl @@ -0,0 +1,438 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, pw_len2); + + w3[2] = pw_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00070_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00070_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, pw_len2); + + w3[2] = pw_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00070_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m00070_a1-pure.cl b/OpenCL/m00070_a1-pure.cl new file mode 100644 index 000000000..37616a484 --- /dev/null +++ b/OpenCL/m00070_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx = ctx0; + + md5_update_global_utf16le (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00070_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx = ctx0; + + md5_update_global_utf16le (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00070_a3-optimized.cl b/OpenCL/m00070_a3-optimized.cl new file mode 100644 index 000000000..daff21ace --- /dev/null +++ b/OpenCL/m00070_a3-optimized.cl @@ -0,0 +1,661 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ +{ \ + a -= b; \ + a = hc_rotr32_S (a, s); \ + a -= f (b, c, d); \ + a -= x; \ + a -= t; \ +} + +#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \ +{ \ + a -= b; \ + a = hc_rotr32_S (a, s); \ + a -= x; \ + a -= t; \ +} + +DECLSPEC void m00070m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + w[14] = pw_len * 8; + w[15] = 0; + + /** + * base + */ + + const u32 F_w0c00 = 0u + MD5C00; + const u32 F_w1c01 = w[ 1] + MD5C01; + const u32 F_w2c02 = w[ 2] + MD5C02; + const u32 F_w3c03 = w[ 3] + MD5C03; + const u32 F_w4c04 = w[ 4] + MD5C04; + const u32 F_w5c05 = w[ 5] + MD5C05; + const u32 F_w6c06 = w[ 6] + MD5C06; + const u32 F_w7c07 = w[ 7] + MD5C07; + const u32 F_w8c08 = w[ 8] + MD5C08; + const u32 F_w9c09 = w[ 9] + MD5C09; + const u32 F_wac0a = w[10] + MD5C0a; + const u32 F_wbc0b = w[11] + MD5C0b; + const u32 F_wcc0c = w[12] + MD5C0c; + const u32 F_wdc0d = w[13] + MD5C0d; + const u32 F_wec0e = w[14] + MD5C0e; + const u32 F_wfc0f = w[15] + MD5C0f; + + const u32 G_w1c10 = w[ 1] + MD5C10; + const u32 G_w6c11 = w[ 6] + MD5C11; + const u32 G_wbc12 = w[11] + MD5C12; + const u32 G_w0c13 = 0u + MD5C13; + const u32 G_w5c14 = w[ 5] + MD5C14; + const u32 G_wac15 = w[10] + MD5C15; + const u32 G_wfc16 = w[15] + MD5C16; + const u32 G_w4c17 = w[ 4] + MD5C17; + const u32 G_w9c18 = w[ 9] + MD5C18; + const u32 G_wec19 = w[14] + MD5C19; + const u32 G_w3c1a = w[ 3] + MD5C1a; + const u32 G_w8c1b = w[ 8] + MD5C1b; + const u32 G_wdc1c = w[13] + MD5C1c; + const u32 G_w2c1d = w[ 2] + MD5C1d; + const u32 G_w7c1e = w[ 7] + MD5C1e; + const u32 G_wcc1f = w[12] + MD5C1f; + + const u32 H_w5c20 = w[ 5] + MD5C20; + const u32 H_w8c21 = w[ 8] + MD5C21; + const u32 H_wbc22 = w[11] + MD5C22; + const u32 H_wec23 = w[14] + MD5C23; + const u32 H_w1c24 = w[ 1] + MD5C24; + const u32 H_w4c25 = w[ 4] + MD5C25; + const u32 H_w7c26 = w[ 7] + MD5C26; + const u32 H_wac27 = w[10] + MD5C27; + const u32 H_wdc28 = w[13] + MD5C28; + const u32 H_w0c29 = 0u + MD5C29; + const u32 H_w3c2a = w[ 3] + MD5C2a; + const u32 H_w6c2b = w[ 6] + MD5C2b; + const u32 H_w9c2c = w[ 9] + MD5C2c; + const u32 H_wcc2d = w[12] + MD5C2d; + const u32 H_wfc2e = w[15] + MD5C2e; + const u32 H_w2c2f = w[ 2] + MD5C2f; + + const u32 I_w0c30 = 0u + MD5C30; + const u32 I_w7c31 = w[ 7] + MD5C31; + const u32 I_wec32 = w[14] + MD5C32; + const u32 I_w5c33 = w[ 5] + MD5C33; + const u32 I_wcc34 = w[12] + MD5C34; + const u32 I_w3c35 = w[ 3] + MD5C35; + const u32 I_wac36 = w[10] + MD5C36; + const u32 I_w1c37 = w[ 1] + MD5C37; + const u32 I_w8c38 = w[ 8] + MD5C38; + const u32 I_wfc39 = w[15] + MD5C39; + const u32 I_w6c3a = w[ 6] + MD5C3a; + const u32 I_wdc3b = w[13] + MD5C3b; + const u32 I_w4c3c = w[ 4] + MD5C3c; + const u32 I_wbc3d = w[11] + MD5C3d; + const u32 I_w2c3e = w[ 2] + MD5C3e; + const u32 I_w9c3f = w[ 9] + MD5C3f; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); + + MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); + + u32x t; + + MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +DECLSPEC void m00070s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u32 F_w0c00 = 0u + MD5C00; + const u32 F_w1c01 = w[ 1] + MD5C01; + const u32 F_w2c02 = w[ 2] + MD5C02; + const u32 F_w3c03 = w[ 3] + MD5C03; + const u32 F_w4c04 = w[ 4] + MD5C04; + const u32 F_w5c05 = w[ 5] + MD5C05; + const u32 F_w6c06 = w[ 6] + MD5C06; + const u32 F_w7c07 = w[ 7] + MD5C07; + const u32 F_w8c08 = w[ 8] + MD5C08; + const u32 F_w9c09 = w[ 9] + MD5C09; + const u32 F_wac0a = w[10] + MD5C0a; + const u32 F_wbc0b = w[11] + MD5C0b; + const u32 F_wcc0c = w[12] + MD5C0c; + const u32 F_wdc0d = w[13] + MD5C0d; + const u32 F_wec0e = w[14] + MD5C0e; + const u32 F_wfc0f = w[15] + MD5C0f; + + const u32 G_w1c10 = w[ 1] + MD5C10; + const u32 G_w6c11 = w[ 6] + MD5C11; + const u32 G_wbc12 = w[11] + MD5C12; + const u32 G_w0c13 = 0u + MD5C13; + const u32 G_w5c14 = w[ 5] + MD5C14; + const u32 G_wac15 = w[10] + MD5C15; + const u32 G_wfc16 = w[15] + MD5C16; + const u32 G_w4c17 = w[ 4] + MD5C17; + const u32 G_w9c18 = w[ 9] + MD5C18; + const u32 G_wec19 = w[14] + MD5C19; + const u32 G_w3c1a = w[ 3] + MD5C1a; + const u32 G_w8c1b = w[ 8] + MD5C1b; + const u32 G_wdc1c = w[13] + MD5C1c; + const u32 G_w2c1d = w[ 2] + MD5C1d; + const u32 G_w7c1e = w[ 7] + MD5C1e; + const u32 G_wcc1f = w[12] + MD5C1f; + + const u32 H_w5c20 = w[ 5] + MD5C20; + const u32 H_w8c21 = w[ 8] + MD5C21; + const u32 H_wbc22 = w[11] + MD5C22; + const u32 H_wec23 = w[14] + MD5C23; + const u32 H_w1c24 = w[ 1] + MD5C24; + const u32 H_w4c25 = w[ 4] + MD5C25; + const u32 H_w7c26 = w[ 7] + MD5C26; + const u32 H_wac27 = w[10] + MD5C27; + const u32 H_wdc28 = w[13] + MD5C28; + const u32 H_w0c29 = 0u + MD5C29; + const u32 H_w3c2a = w[ 3] + MD5C2a; + const u32 H_w6c2b = w[ 6] + MD5C2b; + const u32 H_w9c2c = w[ 9] + MD5C2c; + const u32 H_wcc2d = w[12] + MD5C2d; + const u32 H_wfc2e = w[15] + MD5C2e; + const u32 H_w2c2f = w[ 2] + MD5C2f; + + const u32 I_w0c30 = 0u + MD5C30; + const u32 I_w7c31 = w[ 7] + MD5C31; + const u32 I_wec32 = w[14] + MD5C32; + const u32 I_w5c33 = w[ 5] + MD5C33; + const u32 I_wcc34 = w[12] + MD5C34; + const u32 I_w3c35 = w[ 3] + MD5C35; + const u32 I_wac36 = w[10] + MD5C36; + const u32 I_w1c37 = w[ 1] + MD5C37; + const u32 I_w8c38 = w[ 8] + MD5C38; + const u32 I_wfc39 = w[15] + MD5C39; + const u32 I_w6c3a = w[ 6] + MD5C3a; + const u32 I_wdc3b = w[13] + MD5C3b; + const u32 I_w4c3c = w[ 4] + MD5C3c; + const u32 I_wbc3d = w[11] + MD5C3d; + const u32 I_w2c3e = w[ 2] + MD5C3e; + const u32 I_w9c3f = w[ 9] + MD5C3f; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30); + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30); + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30); + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); + + const u32 pre_cd = c_rev ^ d_rev; + + MD5_STEP_REV1(MD5_H_S, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); + MD5_STEP_REV1(MD5_H_S, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x pre_d = d_rev; + const u32x pre_a = a_rev - w0; + const u32x pre_b = b_rev - (pre_a ^ pre_cd); + const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); + + MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); + + u32x t; + + MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); + + if (MATCHES_NONE_VV (pre_c, c)) continue; + + MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); + + if (MATCHES_NONE_VV (pre_d, d)) continue; + + MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m00070_a3-pure.cl b/OpenCL/m00070_a3-pure.cl new file mode 100644 index 000000000..76658f980 --- /dev/null +++ b/OpenCL/m00070_a3-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + md5_update_vector_utf16le (&ctx, w, pw_len); + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00070_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + md5_update_vector_utf16le (&ctx, w, pw_len); + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00100_a3-optimized.cl b/OpenCL/m00100_a3-optimized.cl index c52572d8f..cabb04e0c 100644 --- a/OpenCL/m00100_a3-optimized.cl +++ b/OpenCL/m00100_a3-optimized.cl @@ -557,7 +557,7 @@ KERNEL_FQ void m00100_m04 (KERN_ATTR_VECTOR ()) * main */ - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_m08 (KERN_ATTR_VECTOR ()) @@ -595,7 +595,7 @@ KERNEL_FQ void m00100_m08 (KERN_ATTR_VECTOR ()) * main */ - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_m16 (KERN_ATTR_VECTOR ()) @@ -633,7 +633,7 @@ KERNEL_FQ void m00100_m16 (KERN_ATTR_VECTOR ()) * main */ - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_s04 (KERN_ATTR_VECTOR ()) @@ -671,7 +671,7 @@ KERNEL_FQ void m00100_s04 (KERN_ATTR_VECTOR ()) * main */ - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_s08 (KERN_ATTR_VECTOR ()) @@ -709,7 +709,7 @@ KERNEL_FQ void m00100_s08 (KERN_ATTR_VECTOR ()) * main */ - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_s16 (KERN_ATTR_VECTOR ()) @@ -747,5 +747,5 @@ KERNEL_FQ void m00100_s16 (KERN_ATTR_VECTOR ()) * main */ - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00110_a3-optimized.cl b/OpenCL/m00110_a3-optimized.cl index 889709072..b96c47fd5 100644 --- a/OpenCL/m00110_a3-optimized.cl +++ b/OpenCL/m00110_a3-optimized.cl @@ -605,7 +605,7 @@ KERNEL_FQ void m00110_m04 (KERN_ATTR_VECTOR ()) * main */ - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_m08 (KERN_ATTR_VECTOR ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m00110_m08 (KERN_ATTR_VECTOR ()) * main */ - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_m16 (KERN_ATTR_VECTOR ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m00110_m16 (KERN_ATTR_VECTOR ()) * main */ - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_s04 (KERN_ATTR_VECTOR ()) @@ -719,7 +719,7 @@ KERNEL_FQ void m00110_s04 (KERN_ATTR_VECTOR ()) * main */ - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_s08 (KERN_ATTR_VECTOR ()) @@ -757,7 +757,7 @@ KERNEL_FQ void m00110_s08 (KERN_ATTR_VECTOR ()) * main */ - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_s16 (KERN_ATTR_VECTOR ()) @@ -795,5 +795,5 @@ KERNEL_FQ void m00110_s16 (KERN_ATTR_VECTOR ()) * main */ - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00120_a3-optimized.cl b/OpenCL/m00120_a3-optimized.cl index fb45e9295..37ad4b0d8 100644 --- a/OpenCL/m00120_a3-optimized.cl +++ b/OpenCL/m00120_a3-optimized.cl @@ -517,7 +517,7 @@ KERNEL_FQ void m00120_m04 (KERN_ATTR_BASIC ()) * main */ - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_m08 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00120_m08 (KERN_ATTR_BASIC ()) * main */ - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_m16 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00120_m16 (KERN_ATTR_BASIC ()) * main */ - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_s04 (KERN_ATTR_BASIC ()) @@ -658,7 +658,7 @@ KERNEL_FQ void m00120_s04 (KERN_ATTR_BASIC ()) * main */ - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_s08 (KERN_ATTR_BASIC ()) @@ -705,7 +705,7 @@ KERNEL_FQ void m00120_s08 (KERN_ATTR_BASIC ()) * main */ - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_s16 (KERN_ATTR_BASIC ()) @@ -752,5 +752,5 @@ KERNEL_FQ void m00120_s16 (KERN_ATTR_BASIC ()) * main */ - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00130_a3-optimized.cl b/OpenCL/m00130_a3-optimized.cl index 445534cba..306a6d659 100644 --- a/OpenCL/m00130_a3-optimized.cl +++ b/OpenCL/m00130_a3-optimized.cl @@ -605,7 +605,7 @@ KERNEL_FQ void m00130_m04 (KERN_ATTR_VECTOR ()) * main */ - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_m08 (KERN_ATTR_VECTOR ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m00130_m08 (KERN_ATTR_VECTOR ()) * main */ - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_m16 (KERN_ATTR_VECTOR ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m00130_m16 (KERN_ATTR_VECTOR ()) * main */ - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_s04 (KERN_ATTR_VECTOR ()) @@ -719,7 +719,7 @@ KERNEL_FQ void m00130_s04 (KERN_ATTR_VECTOR ()) * main */ - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_s08 (KERN_ATTR_VECTOR ()) @@ -757,7 +757,7 @@ KERNEL_FQ void m00130_s08 (KERN_ATTR_VECTOR ()) * main */ - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_s16 (KERN_ATTR_VECTOR ()) @@ -795,5 +795,5 @@ KERNEL_FQ void m00130_s16 (KERN_ATTR_VECTOR ()) * main */ - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00140_a3-optimized.cl b/OpenCL/m00140_a3-optimized.cl index 84910243c..12504e385 100644 --- a/OpenCL/m00140_a3-optimized.cl +++ b/OpenCL/m00140_a3-optimized.cl @@ -517,7 +517,7 @@ KERNEL_FQ void m00140_m04 (KERN_ATTR_BASIC ()) * main */ - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_m08 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00140_m08 (KERN_ATTR_BASIC ()) * main */ - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_m16 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00140_m16 (KERN_ATTR_BASIC ()) * main */ - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_s04 (KERN_ATTR_BASIC ()) @@ -658,7 +658,7 @@ KERNEL_FQ void m00140_s04 (KERN_ATTR_BASIC ()) * main */ - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_s08 (KERN_ATTR_BASIC ()) @@ -705,7 +705,7 @@ KERNEL_FQ void m00140_s08 (KERN_ATTR_BASIC ()) * main */ - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_s16 (KERN_ATTR_BASIC ()) @@ -752,5 +752,5 @@ KERNEL_FQ void m00140_s16 (KERN_ATTR_BASIC ()) * main */ - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00150_a3-optimized.cl b/OpenCL/m00150_a3-optimized.cl index 160f3a879..136f5bf9b 100644 --- a/OpenCL/m00150_a3-optimized.cl +++ b/OpenCL/m00150_a3-optimized.cl @@ -369,7 +369,7 @@ KERNEL_FQ void m00150_m04 (KERN_ATTR_BASIC ()) * main */ - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_m08 (KERN_ATTR_BASIC ()) @@ -416,7 +416,7 @@ KERNEL_FQ void m00150_m08 (KERN_ATTR_BASIC ()) * main */ - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_m16 (KERN_ATTR_BASIC ()) @@ -463,7 +463,7 @@ KERNEL_FQ void m00150_m16 (KERN_ATTR_BASIC ()) * main */ - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_s04 (KERN_ATTR_BASIC ()) @@ -510,7 +510,7 @@ KERNEL_FQ void m00150_s04 (KERN_ATTR_BASIC ()) * main */ - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_s08 (KERN_ATTR_BASIC ()) @@ -557,7 +557,7 @@ KERNEL_FQ void m00150_s08 (KERN_ATTR_BASIC ()) * main */ - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_s16 (KERN_ATTR_BASIC ()) @@ -604,5 +604,5 @@ KERNEL_FQ void m00150_s16 (KERN_ATTR_BASIC ()) * main */ - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00160_a3-optimized.cl b/OpenCL/m00160_a3-optimized.cl index 8d0b98595..743a2bda8 100644 --- a/OpenCL/m00160_a3-optimized.cl +++ b/OpenCL/m00160_a3-optimized.cl @@ -365,7 +365,7 @@ KERNEL_FQ void m00160_m04 (KERN_ATTR_BASIC ()) * main */ - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_m08 (KERN_ATTR_BASIC ()) @@ -412,7 +412,7 @@ KERNEL_FQ void m00160_m08 (KERN_ATTR_BASIC ()) * main */ - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_m16 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m00160_m16 (KERN_ATTR_BASIC ()) * main */ - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_s04 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m00160_s04 (KERN_ATTR_BASIC ()) * main */ - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_s08 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m00160_s08 (KERN_ATTR_BASIC ()) * main */ - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_s16 (KERN_ATTR_BASIC ()) @@ -600,5 +600,5 @@ KERNEL_FQ void m00160_s16 (KERN_ATTR_BASIC ()) * main */ - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00170_a0-optimized.cl b/OpenCL/m00170_a0-optimized.cl new file mode 100644 index 000000000..451d34fe8 --- /dev/null +++ b/OpenCL/m00170_a0-optimized.cl @@ -0,0 +1,408 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00170_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00170_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00170_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m00170_a0-pure.cl b/OpenCL/m00170_a0-pure.cl new file mode 100644 index 000000000..62a0b8d0c --- /dev/null +++ b/OpenCL/m00170_a0-pure.cl @@ -0,0 +1,117 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + sha1_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00170_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + sha1_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00170_a1-optimized.cl b/OpenCL/m00170_a1-optimized.cl new file mode 100644 index 000000000..1656a3aaf --- /dev/null +++ b/OpenCL/m00170_a1-optimized.cl @@ -0,0 +1,522 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00170_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00170_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00170_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m00170_a1-pure.cl b/OpenCL/m00170_a1-pure.cl new file mode 100644 index 000000000..d907f3513 --- /dev/null +++ b/OpenCL/m00170_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + sha1_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00170_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + sha1_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00170_a3-optimized.cl b/OpenCL/m00170_a3-optimized.cl new file mode 100644 index 000000000..c29b27025 --- /dev/null +++ b/OpenCL/m00170_a3-optimized.cl @@ -0,0 +1,748 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void m00170m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u32 c_16s = hc_rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); + const u32 c_17s = hc_rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); + const u32 c_18s = hc_rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); + const u32 c_19s = hc_rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); + const u32 c_20s = hc_rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); + const u32 c_21s = hc_rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); + const u32 c_22s = hc_rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); + const u32 c_23s = hc_rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); + const u32 c_24s = hc_rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); + const u32 c_25s = hc_rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); + const u32 c_26s = hc_rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); + const u32 c_27s = hc_rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); + const u32 c_28s = hc_rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); + const u32 c_29s = hc_rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); + const u32 c_30s = hc_rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); + const u32 c_31s = hc_rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); + const u32 c_32s = hc_rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); + const u32 c_33s = hc_rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); + const u32 c_34s = hc_rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); + const u32 c_35s = hc_rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); + const u32 c_36s = hc_rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); + const u32 c_37s = hc_rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); + const u32 c_38s = hc_rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); + const u32 c_39s = hc_rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); + const u32 c_40s = hc_rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); + const u32 c_41s = hc_rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); + const u32 c_42s = hc_rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); + const u32 c_43s = hc_rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); + const u32 c_44s = hc_rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); + const u32 c_45s = hc_rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); + const u32 c_46s = hc_rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); + const u32 c_47s = hc_rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); + const u32 c_48s = hc_rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); + const u32 c_49s = hc_rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); + const u32 c_50s = hc_rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); + const u32 c_51s = hc_rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); + const u32 c_52s = hc_rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); + const u32 c_53s = hc_rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); + const u32 c_54s = hc_rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); + const u32 c_55s = hc_rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); + const u32 c_56s = hc_rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); + const u32 c_57s = hc_rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); + const u32 c_58s = hc_rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); + const u32 c_59s = hc_rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); + const u32 c_60s = hc_rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); + const u32 c_61s = hc_rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); + const u32 c_62s = hc_rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); + const u32 c_63s = hc_rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); + const u32 c_64s = hc_rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); + const u32 c_65s = hc_rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); + const u32 c_66s = hc_rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); + const u32 c_67s = hc_rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); + const u32 c_68s = hc_rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); + const u32 c_69s = hc_rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); + const u32 c_70s = hc_rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); + const u32 c_71s = hc_rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); + const u32 c_72s = hc_rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); + const u32 c_73s = hc_rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); + const u32 c_74s = hc_rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); + const u32 c_75s = hc_rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); + const u32 c_76s = hc_rotl32_S ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); + const u32 c_77s = hc_rotl32_S ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); + const u32 c_78s = hc_rotl32_S ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); + const u32 c_79s = hc_rotl32_S ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); + + const u32 c_17sK = c_17s + SHA1C00; + const u32 c_18sK = c_18s + SHA1C00; + const u32 c_20sK = c_20s + SHA1C01; + const u32 c_21sK = c_21s + SHA1C01; + const u32 c_23sK = c_23s + SHA1C01; + const u32 c_26sK = c_26s + SHA1C01; + const u32 c_27sK = c_27s + SHA1C01; + const u32 c_29sK = c_29s + SHA1C01; + const u32 c_33sK = c_33s + SHA1C01; + const u32 c_39sK = c_39s + SHA1C01; + const u32 c_41sK = c_41s + SHA1C02; + const u32 c_45sK = c_45s + SHA1C02; + const u32 c_53sK = c_53s + SHA1C02; + const u32 c_65sK = c_65s + SHA1C03; + const u32 c_69sK = c_69s + SHA1C03; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x w0s01 = hc_rotl32 (w0, 1u); + const u32x w0s02 = hc_rotl32 (w0, 2u); + const u32x w0s03 = hc_rotl32 (w0, 3u); + const u32x w0s04 = hc_rotl32 (w0, 4u); + const u32x w0s05 = hc_rotl32 (w0, 5u); + const u32x w0s06 = hc_rotl32 (w0, 6u); + const u32x w0s07 = hc_rotl32 (w0, 7u); + const u32x w0s08 = hc_rotl32 (w0, 8u); + const u32x w0s09 = hc_rotl32 (w0, 9u); + const u32x w0s10 = hc_rotl32 (w0, 10u); + const u32x w0s11 = hc_rotl32 (w0, 11u); + const u32x w0s12 = hc_rotl32 (w0, 12u); + const u32x w0s13 = hc_rotl32 (w0, 13u); + const u32x w0s14 = hc_rotl32 (w0, 14u); + const u32x w0s15 = hc_rotl32 (w0, 15u); + const u32x w0s16 = hc_rotl32 (w0, 16u); + const u32x w0s17 = hc_rotl32 (w0, 17u); + const u32x w0s18 = hc_rotl32 (w0, 18u); + const u32x w0s19 = hc_rotl32 (w0, 19u); + const u32x w0s20 = hc_rotl32 (w0, 20u); + const u32x w0s21 = hc_rotl32 (w0, 21u); + const u32x w0s22 = hc_rotl32 (w0, 22U); + + const u32x w0s04___w0s06 = w0s04 ^ w0s06; + const u32x w0s04___w0s08 = w0s04 ^ w0s08; + const u32x w0s08___w0s12 = w0s08 ^ w0s12; + const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 1]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 2]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 3]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 4]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[ 5]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 6]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 7]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 8]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 9]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[10]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[11]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[12]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[13]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[14]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[15]); + + SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); + SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); + SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); + + #undef K + #define K SHA1C01 + + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); + SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); + + #undef K + #define K SHA1C02 + + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); + SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); + SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); + SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); + + #undef K + #define K SHA1C03 + + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +DECLSPEC void m00170s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u32 c_16s = hc_rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); + const u32 c_17s = hc_rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); + const u32 c_18s = hc_rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); + const u32 c_19s = hc_rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); + const u32 c_20s = hc_rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); + const u32 c_21s = hc_rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); + const u32 c_22s = hc_rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); + const u32 c_23s = hc_rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); + const u32 c_24s = hc_rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); + const u32 c_25s = hc_rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); + const u32 c_26s = hc_rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); + const u32 c_27s = hc_rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); + const u32 c_28s = hc_rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); + const u32 c_29s = hc_rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); + const u32 c_30s = hc_rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); + const u32 c_31s = hc_rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); + const u32 c_32s = hc_rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); + const u32 c_33s = hc_rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); + const u32 c_34s = hc_rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); + const u32 c_35s = hc_rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); + const u32 c_36s = hc_rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); + const u32 c_37s = hc_rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); + const u32 c_38s = hc_rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); + const u32 c_39s = hc_rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); + const u32 c_40s = hc_rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); + const u32 c_41s = hc_rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); + const u32 c_42s = hc_rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); + const u32 c_43s = hc_rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); + const u32 c_44s = hc_rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); + const u32 c_45s = hc_rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); + const u32 c_46s = hc_rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); + const u32 c_47s = hc_rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); + const u32 c_48s = hc_rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); + const u32 c_49s = hc_rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); + const u32 c_50s = hc_rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); + const u32 c_51s = hc_rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); + const u32 c_52s = hc_rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); + const u32 c_53s = hc_rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); + const u32 c_54s = hc_rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); + const u32 c_55s = hc_rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); + const u32 c_56s = hc_rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); + const u32 c_57s = hc_rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); + const u32 c_58s = hc_rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); + const u32 c_59s = hc_rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); + const u32 c_60s = hc_rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); + const u32 c_61s = hc_rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); + const u32 c_62s = hc_rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); + const u32 c_63s = hc_rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); + const u32 c_64s = hc_rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); + const u32 c_65s = hc_rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); + const u32 c_66s = hc_rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); + const u32 c_67s = hc_rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); + const u32 c_68s = hc_rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); + const u32 c_69s = hc_rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); + const u32 c_70s = hc_rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); + const u32 c_71s = hc_rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); + const u32 c_72s = hc_rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); + const u32 c_73s = hc_rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); + const u32 c_74s = hc_rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); + const u32 c_75s = hc_rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); + + const u32 c_17sK = c_17s + SHA1C00; + const u32 c_18sK = c_18s + SHA1C00; + const u32 c_20sK = c_20s + SHA1C01; + const u32 c_21sK = c_21s + SHA1C01; + const u32 c_23sK = c_23s + SHA1C01; + const u32 c_26sK = c_26s + SHA1C01; + const u32 c_27sK = c_27s + SHA1C01; + const u32 c_29sK = c_29s + SHA1C01; + const u32 c_33sK = c_33s + SHA1C01; + const u32 c_39sK = c_39s + SHA1C01; + const u32 c_41sK = c_41s + SHA1C02; + const u32 c_45sK = c_45s + SHA1C02; + const u32 c_53sK = c_53s + SHA1C02; + const u32 c_65sK = c_65s + SHA1C03; + const u32 c_69sK = c_69s + SHA1C03; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x w0s01 = hc_rotl32 (w0, 1u); + const u32x w0s02 = hc_rotl32 (w0, 2u); + const u32x w0s03 = hc_rotl32 (w0, 3u); + const u32x w0s04 = hc_rotl32 (w0, 4u); + const u32x w0s05 = hc_rotl32 (w0, 5u); + const u32x w0s06 = hc_rotl32 (w0, 6u); + const u32x w0s07 = hc_rotl32 (w0, 7u); + const u32x w0s08 = hc_rotl32 (w0, 8u); + const u32x w0s09 = hc_rotl32 (w0, 9u); + const u32x w0s10 = hc_rotl32 (w0, 10u); + const u32x w0s11 = hc_rotl32 (w0, 11u); + const u32x w0s12 = hc_rotl32 (w0, 12u); + const u32x w0s13 = hc_rotl32 (w0, 13u); + const u32x w0s14 = hc_rotl32 (w0, 14u); + const u32x w0s15 = hc_rotl32 (w0, 15u); + const u32x w0s16 = hc_rotl32 (w0, 16u); + const u32x w0s17 = hc_rotl32 (w0, 17u); + const u32x w0s18 = hc_rotl32 (w0, 18u); + const u32x w0s19 = hc_rotl32 (w0, 19u); + const u32x w0s20 = hc_rotl32 (w0, 20u); + + const u32x w0s04___w0s06 = w0s04 ^ w0s06; + const u32x w0s04___w0s08 = w0s04 ^ w0s08; + const u32x w0s08___w0s12 = w0s08 ^ w0s12; + const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 1]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 2]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 3]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 4]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[ 5]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 6]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 7]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 8]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 9]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[10]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[11]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[12]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[13]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[14]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[15]); + + SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); + SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); + SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); + + #undef K + #define K SHA1C01 + + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); + SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); + + #undef K + #define K SHA1C02 + + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); + SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); + SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); + SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); + + #undef K + #define K SHA1C03 + + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + const u32x c_76s = hc_rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); + const u32x c_77s = hc_rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); + const u32x c_78s = hc_rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); + const u32x c_79s = hc_rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); + + const u32x w0s21 = hc_rotl32 (w0, 21u); + const u32x w0s22 = hc_rotl32 (w0, 22U); + + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m00170_a3-pure.cl b/OpenCL/m00170_a3-pure.cl new file mode 100644 index 000000000..635e63821 --- /dev/null +++ b/OpenCL/m00170_a3-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx; + + sha1_init_vector (&ctx); + + sha1_update_vector_utf16beN (&ctx, w, pw_len); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00170_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx; + + sha1_init_vector (&ctx); + + sha1_update_vector_utf16beN (&ctx, w, pw_len); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00200_a3-optimized.cl b/OpenCL/m00200_a3-optimized.cl index bf3a0f496..d35c79372 100644 --- a/OpenCL/m00200_a3-optimized.cl +++ b/OpenCL/m00200_a3-optimized.cl @@ -381,7 +381,7 @@ KERNEL_FQ void m00200_m04 (KERN_ATTR_VECTOR ()) * main */ - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_m08 (KERN_ATTR_VECTOR ()) @@ -419,7 +419,7 @@ KERNEL_FQ void m00200_m08 (KERN_ATTR_VECTOR ()) * main */ - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_m16 (KERN_ATTR_VECTOR ()) @@ -457,7 +457,7 @@ KERNEL_FQ void m00200_m16 (KERN_ATTR_VECTOR ()) * main */ - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_s04 (KERN_ATTR_VECTOR ()) @@ -495,7 +495,7 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_VECTOR ()) * main */ - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_s08 (KERN_ATTR_VECTOR ()) @@ -533,7 +533,7 @@ KERNEL_FQ void m00200_s08 (KERN_ATTR_VECTOR ()) * main */ - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_s16 (KERN_ATTR_VECTOR ()) @@ -571,5 +571,5 @@ KERNEL_FQ void m00200_s16 (KERN_ATTR_VECTOR ()) * main */ - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00300_a3-optimized.cl b/OpenCL/m00300_a3-optimized.cl index 76dd71098..9e94b7159 100644 --- a/OpenCL/m00300_a3-optimized.cl +++ b/OpenCL/m00300_a3-optimized.cl @@ -808,7 +808,7 @@ KERNEL_FQ void m00300_m04 (KERN_ATTR_VECTOR ()) * main */ - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_m08 (KERN_ATTR_VECTOR ()) @@ -846,7 +846,7 @@ KERNEL_FQ void m00300_m08 (KERN_ATTR_VECTOR ()) * main */ - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_m16 (KERN_ATTR_VECTOR ()) @@ -884,7 +884,7 @@ KERNEL_FQ void m00300_m16 (KERN_ATTR_VECTOR ()) * main */ - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_s04 (KERN_ATTR_VECTOR ()) @@ -922,7 +922,7 @@ KERNEL_FQ void m00300_s04 (KERN_ATTR_VECTOR ()) * main */ - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_s08 (KERN_ATTR_VECTOR ()) @@ -960,7 +960,7 @@ KERNEL_FQ void m00300_s08 (KERN_ATTR_VECTOR ()) * main */ - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_s16 (KERN_ATTR_VECTOR ()) @@ -998,5 +998,5 @@ KERNEL_FQ void m00300_s16 (KERN_ATTR_VECTOR ()) * main */ - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00600_a3-optimized.cl b/OpenCL/m00600_a3-optimized.cl index 03946a6ed..be44646ef 100644 --- a/OpenCL/m00600_a3-optimized.cl +++ b/OpenCL/m00600_a3-optimized.cl @@ -225,7 +225,7 @@ KERNEL_FQ void m00600_m04 (KERN_ATTR_VECTOR ()) * main */ - m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_m08 (KERN_ATTR_VECTOR ()) @@ -263,7 +263,7 @@ KERNEL_FQ void m00600_m08 (KERN_ATTR_VECTOR ()) * main */ - m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_m16 (KERN_ATTR_VECTOR ()) @@ -301,7 +301,7 @@ KERNEL_FQ void m00600_m16 (KERN_ATTR_VECTOR ()) * main */ - m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_s04 (KERN_ATTR_VECTOR ()) @@ -339,7 +339,7 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_VECTOR ()) * main */ - m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_s08 (KERN_ATTR_VECTOR ()) @@ -377,7 +377,7 @@ KERNEL_FQ void m00600_s08 (KERN_ATTR_VECTOR ()) * main */ - m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_s16 (KERN_ATTR_VECTOR ()) @@ -415,5 +415,5 @@ KERNEL_FQ void m00600_s16 (KERN_ATTR_VECTOR ()) * main */ - m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00900_a3-optimized.cl b/OpenCL/m00900_a3-optimized.cl index 28ec3cdb8..0b28df133 100644 --- a/OpenCL/m00900_a3-optimized.cl +++ b/OpenCL/m00900_a3-optimized.cl @@ -388,7 +388,7 @@ KERNEL_FQ void m00900_m04 (KERN_ATTR_VECTOR ()) * main */ - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_m08 (KERN_ATTR_VECTOR ()) @@ -426,7 +426,7 @@ KERNEL_FQ void m00900_m08 (KERN_ATTR_VECTOR ()) * main */ - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_m16 (KERN_ATTR_VECTOR ()) @@ -464,7 +464,7 @@ KERNEL_FQ void m00900_m16 (KERN_ATTR_VECTOR ()) * main */ - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_s04 (KERN_ATTR_VECTOR ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m00900_s04 (KERN_ATTR_VECTOR ()) * main */ - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_s08 (KERN_ATTR_VECTOR ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m00900_s08 (KERN_ATTR_VECTOR ()) * main */ - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_s16 (KERN_ATTR_VECTOR ()) @@ -578,5 +578,5 @@ KERNEL_FQ void m00900_s16 (KERN_ATTR_VECTOR ()) * main */ - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01000_a3-optimized.cl b/OpenCL/m01000_a3-optimized.cl index 0bd0a1b42..2e6ce6117 100644 --- a/OpenCL/m01000_a3-optimized.cl +++ b/OpenCL/m01000_a3-optimized.cl @@ -388,7 +388,7 @@ KERNEL_FQ void m01000_m04 (KERN_ATTR_VECTOR ()) * main */ - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_m08 (KERN_ATTR_VECTOR ()) @@ -426,7 +426,7 @@ KERNEL_FQ void m01000_m08 (KERN_ATTR_VECTOR ()) * main */ - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_m16 (KERN_ATTR_VECTOR ()) @@ -464,7 +464,7 @@ KERNEL_FQ void m01000_m16 (KERN_ATTR_VECTOR ()) * main */ - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_s04 (KERN_ATTR_VECTOR ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m01000_s04 (KERN_ATTR_VECTOR ()) * main */ - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_s08 (KERN_ATTR_VECTOR ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m01000_s08 (KERN_ATTR_VECTOR ()) * main */ - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_s16 (KERN_ATTR_VECTOR ()) @@ -578,5 +578,5 @@ KERNEL_FQ void m01000_s16 (KERN_ATTR_VECTOR ()) * main */ - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01100_a3-optimized.cl b/OpenCL/m01100_a3-optimized.cl index d66873da6..f32d54081 100644 --- a/OpenCL/m01100_a3-optimized.cl +++ b/OpenCL/m01100_a3-optimized.cl @@ -550,7 +550,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_VECTOR ()) * main */ - m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_m08 (KERN_ATTR_VECTOR ()) @@ -604,7 +604,7 @@ KERNEL_FQ void m01100_m08 (KERN_ATTR_VECTOR ()) * main */ - m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_m16 (KERN_ATTR_VECTOR ()) @@ -658,7 +658,7 @@ KERNEL_FQ void m01100_m16 (KERN_ATTR_VECTOR ()) * main */ - m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_s04 (KERN_ATTR_VECTOR ()) @@ -712,7 +712,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_VECTOR ()) * main */ - m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_s08 (KERN_ATTR_VECTOR ()) @@ -766,7 +766,7 @@ KERNEL_FQ void m01100_s08 (KERN_ATTR_VECTOR ()) * main */ - m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_s16 (KERN_ATTR_VECTOR ()) @@ -820,5 +820,5 @@ KERNEL_FQ void m01100_s16 (KERN_ATTR_VECTOR ()) * main */ - m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01300_a3-optimized.cl b/OpenCL/m01300_a3-optimized.cl index d6001d606..44e3ef88d 100644 --- a/OpenCL/m01300_a3-optimized.cl +++ b/OpenCL/m01300_a3-optimized.cl @@ -331,7 +331,7 @@ KERNEL_FQ void m01300_m04 (KERN_ATTR_VECTOR ()) * main */ - m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_m08 (KERN_ATTR_VECTOR ()) @@ -369,7 +369,7 @@ KERNEL_FQ void m01300_m08 (KERN_ATTR_VECTOR ()) * main */ - m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_m16 (KERN_ATTR_VECTOR ()) @@ -407,7 +407,7 @@ KERNEL_FQ void m01300_m16 (KERN_ATTR_VECTOR ()) * main */ - m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_s04 (KERN_ATTR_VECTOR ()) @@ -445,7 +445,7 @@ KERNEL_FQ void m01300_s04 (KERN_ATTR_VECTOR ()) * main */ - m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_s08 (KERN_ATTR_VECTOR ()) @@ -483,7 +483,7 @@ KERNEL_FQ void m01300_s08 (KERN_ATTR_VECTOR ()) * main */ - m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_s16 (KERN_ATTR_VECTOR ()) @@ -521,5 +521,5 @@ KERNEL_FQ void m01300_s16 (KERN_ATTR_VECTOR ()) * main */ - m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01400_a3-optimized.cl b/OpenCL/m01400_a3-optimized.cl index 510d62d11..a794b3a0d 100644 --- a/OpenCL/m01400_a3-optimized.cl +++ b/OpenCL/m01400_a3-optimized.cl @@ -334,7 +334,7 @@ KERNEL_FQ void m01400_m04 (KERN_ATTR_VECTOR ()) * main */ - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_m08 (KERN_ATTR_VECTOR ()) @@ -372,7 +372,7 @@ KERNEL_FQ void m01400_m08 (KERN_ATTR_VECTOR ()) * main */ - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_m16 (KERN_ATTR_VECTOR ()) @@ -410,7 +410,7 @@ KERNEL_FQ void m01400_m16 (KERN_ATTR_VECTOR ()) * main */ - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_s04 (KERN_ATTR_VECTOR ()) @@ -448,7 +448,7 @@ KERNEL_FQ void m01400_s04 (KERN_ATTR_VECTOR ()) * main */ - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_s08 (KERN_ATTR_VECTOR ()) @@ -486,7 +486,7 @@ KERNEL_FQ void m01400_s08 (KERN_ATTR_VECTOR ()) * main */ - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_s16 (KERN_ATTR_VECTOR ()) @@ -524,5 +524,5 @@ KERNEL_FQ void m01400_s16 (KERN_ATTR_VECTOR ()) * main */ - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01410_a3-optimized.cl b/OpenCL/m01410_a3-optimized.cl index 3fcddd5ff..893b1b850 100644 --- a/OpenCL/m01410_a3-optimized.cl +++ b/OpenCL/m01410_a3-optimized.cl @@ -385,7 +385,7 @@ KERNEL_FQ void m01410_m04 (KERN_ATTR_VECTOR ()) * main */ - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_m08 (KERN_ATTR_VECTOR ()) @@ -423,7 +423,7 @@ KERNEL_FQ void m01410_m08 (KERN_ATTR_VECTOR ()) * main */ - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_m16 (KERN_ATTR_VECTOR ()) @@ -461,7 +461,7 @@ KERNEL_FQ void m01410_m16 (KERN_ATTR_VECTOR ()) * main */ - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_s04 (KERN_ATTR_VECTOR ()) @@ -499,7 +499,7 @@ KERNEL_FQ void m01410_s04 (KERN_ATTR_VECTOR ()) * main */ - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_s08 (KERN_ATTR_VECTOR ()) @@ -537,7 +537,7 @@ KERNEL_FQ void m01410_s08 (KERN_ATTR_VECTOR ()) * main */ - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_s16 (KERN_ATTR_VECTOR ()) @@ -575,5 +575,5 @@ KERNEL_FQ void m01410_s16 (KERN_ATTR_VECTOR ()) * main */ - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01420_a3-optimized.cl b/OpenCL/m01420_a3-optimized.cl index 9e9e4f222..fddff4a44 100644 --- a/OpenCL/m01420_a3-optimized.cl +++ b/OpenCL/m01420_a3-optimized.cl @@ -493,7 +493,7 @@ KERNEL_FQ void m01420_m04 (KERN_ATTR_BASIC ()) * main */ - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_m08 (KERN_ATTR_BASIC ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m01420_m08 (KERN_ATTR_BASIC ()) * main */ - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_m16 (KERN_ATTR_BASIC ()) @@ -587,7 +587,7 @@ KERNEL_FQ void m01420_m16 (KERN_ATTR_BASIC ()) * main */ - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_s04 (KERN_ATTR_BASIC ()) @@ -634,7 +634,7 @@ KERNEL_FQ void m01420_s04 (KERN_ATTR_BASIC ()) * main */ - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_s08 (KERN_ATTR_BASIC ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m01420_s08 (KERN_ATTR_BASIC ()) * main */ - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_s16 (KERN_ATTR_BASIC ()) @@ -728,5 +728,5 @@ KERNEL_FQ void m01420_s16 (KERN_ATTR_BASIC ()) * main */ - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01430_a3-optimized.cl b/OpenCL/m01430_a3-optimized.cl index 626a2b5ae..9e9aee86b 100644 --- a/OpenCL/m01430_a3-optimized.cl +++ b/OpenCL/m01430_a3-optimized.cl @@ -385,7 +385,7 @@ KERNEL_FQ void m01430_m04 (KERN_ATTR_VECTOR ()) * main */ - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_m08 (KERN_ATTR_VECTOR ()) @@ -423,7 +423,7 @@ KERNEL_FQ void m01430_m08 (KERN_ATTR_VECTOR ()) * main */ - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_m16 (KERN_ATTR_VECTOR ()) @@ -461,7 +461,7 @@ KERNEL_FQ void m01430_m16 (KERN_ATTR_VECTOR ()) * main */ - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_s04 (KERN_ATTR_VECTOR ()) @@ -499,7 +499,7 @@ KERNEL_FQ void m01430_s04 (KERN_ATTR_VECTOR ()) * main */ - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_s08 (KERN_ATTR_VECTOR ()) @@ -537,7 +537,7 @@ KERNEL_FQ void m01430_s08 (KERN_ATTR_VECTOR ()) * main */ - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_s16 (KERN_ATTR_VECTOR ()) @@ -575,5 +575,5 @@ KERNEL_FQ void m01430_s16 (KERN_ATTR_VECTOR ()) * main */ - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01440_a3-optimized.cl b/OpenCL/m01440_a3-optimized.cl index d5dc59efb..a41522434 100644 --- a/OpenCL/m01440_a3-optimized.cl +++ b/OpenCL/m01440_a3-optimized.cl @@ -493,7 +493,7 @@ KERNEL_FQ void m01440_m04 (KERN_ATTR_BASIC ()) * main */ - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_m08 (KERN_ATTR_BASIC ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m01440_m08 (KERN_ATTR_BASIC ()) * main */ - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_m16 (KERN_ATTR_BASIC ()) @@ -587,7 +587,7 @@ KERNEL_FQ void m01440_m16 (KERN_ATTR_BASIC ()) * main */ - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_s04 (KERN_ATTR_BASIC ()) @@ -634,7 +634,7 @@ KERNEL_FQ void m01440_s04 (KERN_ATTR_BASIC ()) * main */ - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_s08 (KERN_ATTR_BASIC ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m01440_s08 (KERN_ATTR_BASIC ()) * main */ - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_s16 (KERN_ATTR_BASIC ()) @@ -728,5 +728,5 @@ KERNEL_FQ void m01440_s16 (KERN_ATTR_BASIC ()) * main */ - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01450_a3-optimized.cl b/OpenCL/m01450_a3-optimized.cl index c734bde4e..46231c2b1 100644 --- a/OpenCL/m01450_a3-optimized.cl +++ b/OpenCL/m01450_a3-optimized.cl @@ -381,7 +381,7 @@ KERNEL_FQ void m01450_m04 (KERN_ATTR_BASIC ()) * main */ - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_m08 (KERN_ATTR_BASIC ()) @@ -428,7 +428,7 @@ KERNEL_FQ void m01450_m08 (KERN_ATTR_BASIC ()) * main */ - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_m16 (KERN_ATTR_BASIC ()) @@ -475,7 +475,7 @@ KERNEL_FQ void m01450_m16 (KERN_ATTR_BASIC ()) * main */ - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_s04 (KERN_ATTR_BASIC ()) @@ -522,7 +522,7 @@ KERNEL_FQ void m01450_s04 (KERN_ATTR_BASIC ()) * main */ - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_s08 (KERN_ATTR_BASIC ()) @@ -569,7 +569,7 @@ KERNEL_FQ void m01450_s08 (KERN_ATTR_BASIC ()) * main */ - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_s16 (KERN_ATTR_BASIC ()) @@ -616,5 +616,5 @@ KERNEL_FQ void m01450_s16 (KERN_ATTR_BASIC ()) * main */ - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01460_a3-optimized.cl b/OpenCL/m01460_a3-optimized.cl index d99f06ffb..ef3ea4fd1 100644 --- a/OpenCL/m01460_a3-optimized.cl +++ b/OpenCL/m01460_a3-optimized.cl @@ -377,7 +377,7 @@ KERNEL_FQ void m01460_m04 (KERN_ATTR_BASIC ()) * main */ - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_m08 (KERN_ATTR_BASIC ()) @@ -424,7 +424,7 @@ KERNEL_FQ void m01460_m08 (KERN_ATTR_BASIC ()) * main */ - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_m16 (KERN_ATTR_BASIC ()) @@ -471,7 +471,7 @@ KERNEL_FQ void m01460_m16 (KERN_ATTR_BASIC ()) * main */ - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_s04 (KERN_ATTR_BASIC ()) @@ -518,7 +518,7 @@ KERNEL_FQ void m01460_s04 (KERN_ATTR_BASIC ()) * main */ - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_s08 (KERN_ATTR_BASIC ()) @@ -565,7 +565,7 @@ KERNEL_FQ void m01460_s08 (KERN_ATTR_BASIC ()) * main */ - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_s16 (KERN_ATTR_BASIC ()) @@ -612,5 +612,5 @@ KERNEL_FQ void m01460_s16 (KERN_ATTR_BASIC ()) * main */ - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01470_a0-optimized.cl b/OpenCL/m01470_a0-optimized.cl new file mode 100644 index 000000000..11fc6343c --- /dev/null +++ b/OpenCL/m01470_a0-optimized.cl @@ -0,0 +1,384 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \ +{ \ + u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \ + u32 t1 = a - t2; \ + a = b; \ + b = c; \ + c = d; \ + d = e - t1; \ + e = f; \ + f = g; \ + g = h; \ + h = 0; \ +} + +KERNEL_FQ void m01470_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_M_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01470_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01470_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; + + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + + if (MATCHES_NONE_VS (h, d_rev)) continue; + + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_S_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01470_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m01470_a0-pure.cl b/OpenCL/m01470_a0-pure.cl new file mode 100644 index 000000000..aa1ac05d3 --- /dev/null +++ b/OpenCL/m01470_a0-pure.cl @@ -0,0 +1,121 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m01470_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx = ctx0; + + sha256_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01470_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx = ctx0; + + sha256_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01470_a1-optimized.cl b/OpenCL/m01470_a1-optimized.cl new file mode 100644 index 000000000..9274b8b53 --- /dev/null +++ b/OpenCL/m01470_a1-optimized.cl @@ -0,0 +1,498 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \ +{ \ + u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \ + u32 t1 = a - t2; \ + a = b; \ + b = c; \ + c = d; \ + d = e - t1; \ + e = f; \ + f = g; \ + g = h; \ + h = 0; \ +} + +KERNEL_FQ void m01470_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_M_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01470_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01470_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; + + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + + if (MATCHES_NONE_VS (h, d_rev)) continue; + + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_S_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01470_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m01470_a1-pure.cl b/OpenCL/m01470_a1-pure.cl new file mode 100644 index 000000000..29a00f687 --- /dev/null +++ b/OpenCL/m01470_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m01470_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01470_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01470_a3-optimized.cl b/OpenCL/m01470_a3-optimized.cl new file mode 100644 index 000000000..b5bfa4155 --- /dev/null +++ b/OpenCL/m01470_a3-optimized.cl @@ -0,0 +1,634 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \ +{ \ + u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \ + u32 t1 = a - t2; \ + a = b; \ + b = c; \ + c = d; \ + d = e - t1; \ + e = f; \ + f = g; \ + g = h; \ + h = 0; \ +} + +DECLSPEC void m01470m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + /** + * sha256 + */ + + u32x w0_t = t0[0]; + u32x w1_t = t0[1]; + u32x w2_t = t0[2]; + u32x w3_t = t0[3]; + u32x w4_t = t1[0]; + u32x w5_t = t1[1]; + u32x w6_t = t1[2]; + u32x w7_t = t1[3]; + u32x w8_t = t2[0]; + u32x w9_t = t2[1]; + u32x wa_t = t2[2]; + u32x wb_t = t2[3]; + u32x wc_t = t3[0]; + u32x wd_t = t3[1]; + u32x we_t = 0; + u32x wf_t = pw_len * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_M_SIMD (d, h, c, g); + } +} + +DECLSPEC void m01470s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; + + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + /** + * sha256 + */ + + u32x w0_t = t0[0]; + u32x w1_t = t0[1]; + u32x w2_t = t0[2]; + u32x w3_t = t0[3]; + u32x w4_t = t1[0]; + u32x w5_t = t1[1]; + u32x w6_t = t1[2]; + u32x w7_t = t1[3]; + u32x w8_t = t2[0]; + u32x w9_t = t2[1]; + u32x wa_t = t2[2]; + u32x wb_t = t2[3]; + u32x wc_t = t3[0]; + u32x wd_t = t3[1]; + u32x we_t = 0; + u32x wf_t = pw_len * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + + if (MATCHES_NONE_VS (h, d_rev)) continue; + + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_S_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} diff --git a/OpenCL/m01470_a3-pure.cl b/OpenCL/m01470_a3-pure.cl new file mode 100644 index 000000000..2f4c3cf1b --- /dev/null +++ b/OpenCL/m01470_a3-pure.cl @@ -0,0 +1,145 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m01470_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx; + + sha256_init_vector_from_scalar (&ctx, &ctx0); + + sha256_update_vector_utf16beN (&ctx, w, pw_len); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01470_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx; + + sha256_init_vector_from_scalar (&ctx, &ctx0); + + sha256_update_vector_utf16beN (&ctx, w, pw_len); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl index 76eedbb3d..d4312dbbb 100644 --- a/OpenCL/m01500_a3-pure.cl +++ b/OpenCL/m01500_a3-pure.cl @@ -1901,13 +1901,13 @@ KERNEL_FQ void m01500_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) #endif for (int i = 0, j = 0; i < 32; i += 8, j += 7) { - atomic_or (&words_buf_b[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice)); } } @@ -1920,6 +1920,8 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * salt */ @@ -2297,6 +2299,8 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * salt */ diff --git a/OpenCL/m01700_a3-optimized.cl b/OpenCL/m01700_a3-optimized.cl index 31abebaa3..62c60582d 100644 --- a/OpenCL/m01700_a3-optimized.cl +++ b/OpenCL/m01700_a3-optimized.cl @@ -297,7 +297,7 @@ KERNEL_FQ void m01700_m04 (KERN_ATTR_VECTOR ()) * main */ - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_m08 (KERN_ATTR_VECTOR ()) @@ -335,7 +335,7 @@ KERNEL_FQ void m01700_m08 (KERN_ATTR_VECTOR ()) * main */ - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_m16 (KERN_ATTR_VECTOR ()) @@ -373,7 +373,7 @@ KERNEL_FQ void m01700_m16 (KERN_ATTR_VECTOR ()) * main */ - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_s04 (KERN_ATTR_VECTOR ()) @@ -411,7 +411,7 @@ KERNEL_FQ void m01700_s04 (KERN_ATTR_VECTOR ()) * main */ - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_s08 (KERN_ATTR_VECTOR ()) @@ -449,7 +449,7 @@ KERNEL_FQ void m01700_s08 (KERN_ATTR_VECTOR ()) * main */ - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_s16 (KERN_ATTR_VECTOR ()) @@ -487,5 +487,5 @@ KERNEL_FQ void m01700_s16 (KERN_ATTR_VECTOR ()) * main */ - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01710_a3-optimized.cl b/OpenCL/m01710_a3-optimized.cl index 031c8d35a..d8d03e473 100644 --- a/OpenCL/m01710_a3-optimized.cl +++ b/OpenCL/m01710_a3-optimized.cl @@ -348,7 +348,7 @@ KERNEL_FQ void m01710_m04 (KERN_ATTR_VECTOR ()) * main */ - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_m08 (KERN_ATTR_VECTOR ()) @@ -386,7 +386,7 @@ KERNEL_FQ void m01710_m08 (KERN_ATTR_VECTOR ()) * main */ - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_m16 (KERN_ATTR_VECTOR ()) @@ -424,7 +424,7 @@ KERNEL_FQ void m01710_m16 (KERN_ATTR_VECTOR ()) * main */ - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_s04 (KERN_ATTR_VECTOR ()) @@ -462,7 +462,7 @@ KERNEL_FQ void m01710_s04 (KERN_ATTR_VECTOR ()) * main */ - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_s08 (KERN_ATTR_VECTOR ()) @@ -500,7 +500,7 @@ KERNEL_FQ void m01710_s08 (KERN_ATTR_VECTOR ()) * main */ - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_s16 (KERN_ATTR_VECTOR ()) @@ -538,5 +538,5 @@ KERNEL_FQ void m01710_s16 (KERN_ATTR_VECTOR ()) * main */ - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01720_a3-optimized.cl b/OpenCL/m01720_a3-optimized.cl index e3e878e79..58b50041c 100644 --- a/OpenCL/m01720_a3-optimized.cl +++ b/OpenCL/m01720_a3-optimized.cl @@ -412,7 +412,7 @@ KERNEL_FQ void m01720_m04 (KERN_ATTR_BASIC ()) * main */ - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_m08 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m01720_m08 (KERN_ATTR_BASIC ()) * main */ - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_m16 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m01720_m16 (KERN_ATTR_BASIC ()) * main */ - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_s04 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m01720_s04 (KERN_ATTR_BASIC ()) * main */ - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_s08 (KERN_ATTR_BASIC ()) @@ -600,7 +600,7 @@ KERNEL_FQ void m01720_s08 (KERN_ATTR_BASIC ()) * main */ - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_s16 (KERN_ATTR_BASIC ()) @@ -647,5 +647,5 @@ KERNEL_FQ void m01720_s16 (KERN_ATTR_BASIC ()) * main */ - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01730_a3-optimized.cl b/OpenCL/m01730_a3-optimized.cl index bef298ec5..e151b8195 100644 --- a/OpenCL/m01730_a3-optimized.cl +++ b/OpenCL/m01730_a3-optimized.cl @@ -348,7 +348,7 @@ KERNEL_FQ void m01730_m04 (KERN_ATTR_VECTOR ()) * main */ - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_m08 (KERN_ATTR_VECTOR ()) @@ -386,7 +386,7 @@ KERNEL_FQ void m01730_m08 (KERN_ATTR_VECTOR ()) * main */ - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_m16 (KERN_ATTR_VECTOR ()) @@ -424,7 +424,7 @@ KERNEL_FQ void m01730_m16 (KERN_ATTR_VECTOR ()) * main */ - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_s04 (KERN_ATTR_VECTOR ()) @@ -462,7 +462,7 @@ KERNEL_FQ void m01730_s04 (KERN_ATTR_VECTOR ()) * main */ - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_s08 (KERN_ATTR_VECTOR ()) @@ -500,7 +500,7 @@ KERNEL_FQ void m01730_s08 (KERN_ATTR_VECTOR ()) * main */ - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_s16 (KERN_ATTR_VECTOR ()) @@ -538,5 +538,5 @@ KERNEL_FQ void m01730_s16 (KERN_ATTR_VECTOR ()) * main */ - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01740_a3-optimized.cl b/OpenCL/m01740_a3-optimized.cl index ec0a84069..bc93dc5c2 100644 --- a/OpenCL/m01740_a3-optimized.cl +++ b/OpenCL/m01740_a3-optimized.cl @@ -412,7 +412,7 @@ KERNEL_FQ void m01740_m04 (KERN_ATTR_BASIC ()) * main */ - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_m08 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m01740_m08 (KERN_ATTR_BASIC ()) * main */ - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_m16 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m01740_m16 (KERN_ATTR_BASIC ()) * main */ - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_s04 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m01740_s04 (KERN_ATTR_BASIC ()) * main */ - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_s08 (KERN_ATTR_BASIC ()) @@ -600,7 +600,7 @@ KERNEL_FQ void m01740_s08 (KERN_ATTR_BASIC ()) * main */ - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_s16 (KERN_ATTR_BASIC ()) @@ -647,5 +647,5 @@ KERNEL_FQ void m01740_s16 (KERN_ATTR_BASIC ()) * main */ - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01750_a3-optimized.cl b/OpenCL/m01750_a3-optimized.cl index 358408cbe..01a42f37b 100644 --- a/OpenCL/m01750_a3-optimized.cl +++ b/OpenCL/m01750_a3-optimized.cl @@ -465,7 +465,7 @@ KERNEL_FQ void m01750_m04 (KERN_ATTR_BASIC ()) * main */ - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_m08 (KERN_ATTR_BASIC ()) @@ -512,7 +512,7 @@ KERNEL_FQ void m01750_m08 (KERN_ATTR_BASIC ()) * main */ - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_m16 (KERN_ATTR_BASIC ()) @@ -559,7 +559,7 @@ KERNEL_FQ void m01750_m16 (KERN_ATTR_BASIC ()) * main */ - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_s04 (KERN_ATTR_BASIC ()) @@ -606,7 +606,7 @@ KERNEL_FQ void m01750_s04 (KERN_ATTR_BASIC ()) * main */ - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_s08 (KERN_ATTR_BASIC ()) @@ -653,7 +653,7 @@ KERNEL_FQ void m01750_s08 (KERN_ATTR_BASIC ()) * main */ - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_s16 (KERN_ATTR_BASIC ()) @@ -700,5 +700,5 @@ KERNEL_FQ void m01750_s16 (KERN_ATTR_BASIC ()) * main */ - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01760_a3-optimized.cl b/OpenCL/m01760_a3-optimized.cl index d7de90585..a317932a2 100644 --- a/OpenCL/m01760_a3-optimized.cl +++ b/OpenCL/m01760_a3-optimized.cl @@ -461,7 +461,7 @@ KERNEL_FQ void m01760_m04 (KERN_ATTR_BASIC ()) * main */ - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_m08 (KERN_ATTR_BASIC ()) @@ -508,7 +508,7 @@ KERNEL_FQ void m01760_m08 (KERN_ATTR_BASIC ()) * main */ - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_m16 (KERN_ATTR_BASIC ()) @@ -555,7 +555,7 @@ KERNEL_FQ void m01760_m16 (KERN_ATTR_BASIC ()) * main */ - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_s04 (KERN_ATTR_BASIC ()) @@ -602,7 +602,7 @@ KERNEL_FQ void m01760_s04 (KERN_ATTR_BASIC ()) * main */ - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_s08 (KERN_ATTR_BASIC ()) @@ -649,7 +649,7 @@ KERNEL_FQ void m01760_s08 (KERN_ATTR_BASIC ()) * main */ - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_s16 (KERN_ATTR_BASIC ()) @@ -696,5 +696,5 @@ KERNEL_FQ void m01760_s16 (KERN_ATTR_BASIC ()) * main */ - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01800-optimized.cl b/OpenCL/m01800-optimized.cl index 4625d9a8c..633ed4632 100644 --- a/OpenCL/m01800-optimized.cl +++ b/OpenCL/m01800-optimized.cl @@ -188,7 +188,7 @@ KERNEL_FQ void m01800_init (KERN_ATTR_TMPS (sha512crypt_tmp_t)) w0[2] = pws[gid].i[2]; w0[3] = pws[gid].i[3]; - const u32 pw_len = pws[gid].pw_len & 63; + const u32 pw_len = pws[gid].pw_len & 15; /** * salt @@ -315,7 +315,7 @@ KERNEL_FQ void m01800_loop (KERN_ATTR_TMPS (sha512crypt_tmp_t)) l_p_bytes0[0] = tmps[gid].l_p_bytes[0]; l_p_bytes0[1] = tmps[gid].l_p_bytes[1]; - const u32 pw_len = pws[gid].pw_len & 63; + const u32 pw_len = pws[gid].pw_len & 15; u64 l_s_bytes0[2]; diff --git a/OpenCL/m02400_a3-optimized.cl b/OpenCL/m02400_a3-optimized.cl index b2d5ea5cc..bea333e54 100644 --- a/OpenCL/m02400_a3-optimized.cl +++ b/OpenCL/m02400_a3-optimized.cl @@ -494,7 +494,7 @@ KERNEL_FQ void m02400_m04 (KERN_ATTR_VECTOR ()) * main */ - m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_m08 (KERN_ATTR_VECTOR ()) @@ -532,7 +532,7 @@ KERNEL_FQ void m02400_m08 (KERN_ATTR_VECTOR ()) * main */ - m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_m16 (KERN_ATTR_VECTOR ()) @@ -570,7 +570,7 @@ KERNEL_FQ void m02400_m16 (KERN_ATTR_VECTOR ()) * main */ - m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_s04 (KERN_ATTR_VECTOR ()) @@ -608,7 +608,7 @@ KERNEL_FQ void m02400_s04 (KERN_ATTR_VECTOR ()) * main */ - m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_s08 (KERN_ATTR_VECTOR ()) @@ -646,7 +646,7 @@ KERNEL_FQ void m02400_s08 (KERN_ATTR_VECTOR ()) * main */ - m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_s16 (KERN_ATTR_VECTOR ()) @@ -684,5 +684,5 @@ KERNEL_FQ void m02400_s16 (KERN_ATTR_VECTOR ()) * main */ - m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m02410_a3-optimized.cl b/OpenCL/m02410_a3-optimized.cl index 3615f5b27..96952ef70 100644 --- a/OpenCL/m02410_a3-optimized.cl +++ b/OpenCL/m02410_a3-optimized.cl @@ -592,7 +592,7 @@ KERNEL_FQ void m02410_m04 (KERN_ATTR_VECTOR ()) * main */ - m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_m08 (KERN_ATTR_VECTOR ()) @@ -630,7 +630,7 @@ KERNEL_FQ void m02410_m08 (KERN_ATTR_VECTOR ()) * main */ - m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_m16 (KERN_ATTR_VECTOR ()) @@ -668,7 +668,7 @@ KERNEL_FQ void m02410_m16 (KERN_ATTR_VECTOR ()) * main */ - m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_s04 (KERN_ATTR_VECTOR ()) @@ -706,7 +706,7 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_VECTOR ()) * main */ - m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_s08 (KERN_ATTR_VECTOR ()) @@ -744,7 +744,7 @@ KERNEL_FQ void m02410_s08 (KERN_ATTR_VECTOR ()) * main */ - m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_s16 (KERN_ATTR_VECTOR ()) @@ -782,5 +782,5 @@ KERNEL_FQ void m02410_s16 (KERN_ATTR_VECTOR ()) * main */ - m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl index aadc503ec..ed74c9c56 100644 --- a/OpenCL/m02500-pure.cl +++ b/OpenCL/m02500-pure.cl @@ -466,7 +466,7 @@ KERNEL_FQ void m02500_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -646,7 +646,7 @@ KERNEL_FQ void m02500_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -936,7 +936,7 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) && (keymic[2] == wpa_eapol->keymic[2]) && (keymic[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } diff --git a/OpenCL/m02501-pure.cl b/OpenCL/m02501-pure.cl index fc140b9a7..dcf8e66f8 100644 --- a/OpenCL/m02501-pure.cl +++ b/OpenCL/m02501-pure.cl @@ -336,7 +336,7 @@ KERNEL_FQ void m02501_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -516,7 +516,7 @@ KERNEL_FQ void m02501_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -802,7 +802,7 @@ KERNEL_FQ void m02501_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) && (keymic[2] == wpa_eapol->keymic[2]) && (keymic[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } diff --git a/OpenCL/m02610_a3-optimized.cl b/OpenCL/m02610_a3-optimized.cl index e8a8b7757..434a2d9ea 100644 --- a/OpenCL/m02610_a3-optimized.cl +++ b/OpenCL/m02610_a3-optimized.cl @@ -625,7 +625,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ()) * main */ - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_m08 (KERN_ATTR_BASIC ()) @@ -695,7 +695,7 @@ KERNEL_FQ void m02610_m08 (KERN_ATTR_BASIC ()) * main */ - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_m16 (KERN_ATTR_BASIC ()) @@ -765,7 +765,7 @@ KERNEL_FQ void m02610_m16 (KERN_ATTR_BASIC ()) * main */ - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ()) @@ -835,7 +835,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ()) * main */ - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_s08 (KERN_ATTR_BASIC ()) @@ -905,7 +905,7 @@ KERNEL_FQ void m02610_s08 (KERN_ATTR_BASIC ()) * main */ - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_s16 (KERN_ATTR_BASIC ()) @@ -975,5 +975,5 @@ KERNEL_FQ void m02610_s16 (KERN_ATTR_BASIC ()) * main */ - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m02710_a3-optimized.cl b/OpenCL/m02710_a3-optimized.cl index ea1992a00..750e462dd 100644 --- a/OpenCL/m02710_a3-optimized.cl +++ b/OpenCL/m02710_a3-optimized.cl @@ -794,7 +794,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ()) * main */ - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_m08 (KERN_ATTR_BASIC ()) @@ -864,7 +864,7 @@ KERNEL_FQ void m02710_m08 (KERN_ATTR_BASIC ()) * main */ - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_m16 (KERN_ATTR_BASIC ()) @@ -934,7 +934,7 @@ KERNEL_FQ void m02710_m16 (KERN_ATTR_BASIC ()) * main */ - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ()) @@ -1004,7 +1004,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ()) * main */ - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_s08 (KERN_ATTR_BASIC ()) @@ -1074,7 +1074,7 @@ KERNEL_FQ void m02710_s08 (KERN_ATTR_BASIC ()) * main */ - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_s16 (KERN_ATTR_BASIC ()) @@ -1144,5 +1144,5 @@ KERNEL_FQ void m02710_s16 (KERN_ATTR_BASIC ()) * main */ - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m02810_a3-optimized.cl b/OpenCL/m02810_a3-optimized.cl index 7d2899f9c..ae295de95 100644 --- a/OpenCL/m02810_a3-optimized.cl +++ b/OpenCL/m02810_a3-optimized.cl @@ -792,7 +792,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ()) * main */ - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_m08 (KERN_ATTR_BASIC ()) @@ -862,7 +862,7 @@ KERNEL_FQ void m02810_m08 (KERN_ATTR_BASIC ()) * main */ - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_m16 (KERN_ATTR_BASIC ()) @@ -932,7 +932,7 @@ KERNEL_FQ void m02810_m16 (KERN_ATTR_BASIC ()) * main */ - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ()) @@ -1002,7 +1002,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ()) * main */ - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_s08 (KERN_ATTR_BASIC ()) @@ -1072,7 +1072,7 @@ KERNEL_FQ void m02810_s08 (KERN_ATTR_BASIC ()) * main */ - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_s16 (KERN_ATTR_BASIC ()) @@ -1142,5 +1142,5 @@ KERNEL_FQ void m02810_s16 (KERN_ATTR_BASIC ()) * main */ - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl index 1dff1dc51..da58c34df 100644 --- a/OpenCL/m03000_a3-pure.cl +++ b/OpenCL/m03000_a3-pure.cl @@ -1741,14 +1741,14 @@ KERNEL_FQ void m03000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) for (int i = 0; i < 32; i += 8) { - atomic_or (&words_buf_b[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice)); } } @@ -1761,6 +1761,8 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * base */ @@ -2138,6 +2140,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * digest */ diff --git a/OpenCL/m03100_a3-optimized.cl b/OpenCL/m03100_a3-optimized.cl index ce6e79159..3243060b8 100644 --- a/OpenCL/m03100_a3-optimized.cl +++ b/OpenCL/m03100_a3-optimized.cl @@ -468,7 +468,7 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_VECTOR ()) * main */ - m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_m08 (KERN_ATTR_VECTOR ()) @@ -551,7 +551,7 @@ KERNEL_FQ void m03100_m08 (KERN_ATTR_VECTOR ()) * main */ - m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_m16 (KERN_ATTR_VECTOR ()) @@ -638,7 +638,7 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_VECTOR ()) * main */ - m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_s08 (KERN_ATTR_VECTOR ()) @@ -721,7 +721,7 @@ KERNEL_FQ void m03100_s08 (KERN_ATTR_VECTOR ()) * main */ - m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_s16 (KERN_ATTR_VECTOR ()) diff --git a/OpenCL/m03710_a3-optimized.cl b/OpenCL/m03710_a3-optimized.cl index 4b62cb989..48f9dc8b4 100644 --- a/OpenCL/m03710_a3-optimized.cl +++ b/OpenCL/m03710_a3-optimized.cl @@ -686,7 +686,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ()) * main */ - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_m08 (KERN_ATTR_BASIC ()) @@ -756,7 +756,7 @@ KERNEL_FQ void m03710_m08 (KERN_ATTR_BASIC ()) * main */ - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_m16 (KERN_ATTR_BASIC ()) @@ -826,7 +826,7 @@ KERNEL_FQ void m03710_m16 (KERN_ATTR_BASIC ()) * main */ - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ()) @@ -896,7 +896,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ()) * main */ - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_s08 (KERN_ATTR_BASIC ()) @@ -966,7 +966,7 @@ KERNEL_FQ void m03710_s08 (KERN_ATTR_BASIC ()) * main */ - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_s16 (KERN_ATTR_BASIC ()) @@ -1036,5 +1036,5 @@ KERNEL_FQ void m03710_s16 (KERN_ATTR_BASIC ()) * main */ - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m03800_a3-optimized.cl b/OpenCL/m03800_a3-optimized.cl index f21b88389..cd16286e1 100644 --- a/OpenCL/m03800_a3-optimized.cl +++ b/OpenCL/m03800_a3-optimized.cl @@ -527,7 +527,7 @@ KERNEL_FQ void m03800_m04 (KERN_ATTR_BASIC ()) * main */ - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_m08 (KERN_ATTR_BASIC ()) @@ -580,7 +580,7 @@ KERNEL_FQ void m03800_m08 (KERN_ATTR_BASIC ()) * main */ - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_m16 (KERN_ATTR_BASIC ()) @@ -633,7 +633,7 @@ KERNEL_FQ void m03800_m16 (KERN_ATTR_BASIC ()) * main */ - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_s04 (KERN_ATTR_BASIC ()) @@ -686,7 +686,7 @@ KERNEL_FQ void m03800_s04 (KERN_ATTR_BASIC ()) * main */ - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_s08 (KERN_ATTR_BASIC ()) @@ -739,7 +739,7 @@ KERNEL_FQ void m03800_s08 (KERN_ATTR_BASIC ()) * main */ - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_s16 (KERN_ATTR_BASIC ()) @@ -792,5 +792,5 @@ KERNEL_FQ void m03800_s16 (KERN_ATTR_BASIC ()) * main */ - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m03910_a3-optimized.cl b/OpenCL/m03910_a3-optimized.cl index 085aa4177..c0af5aa66 100644 --- a/OpenCL/m03910_a3-optimized.cl +++ b/OpenCL/m03910_a3-optimized.cl @@ -792,7 +792,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ()) * main */ - m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_m08 (KERN_ATTR_BASIC ()) @@ -862,7 +862,7 @@ KERNEL_FQ void m03910_m08 (KERN_ATTR_BASIC ()) * main */ - m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_m16 (KERN_ATTR_BASIC ()) @@ -932,7 +932,7 @@ KERNEL_FQ void m03910_m16 (KERN_ATTR_BASIC ()) * main */ - m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ()) @@ -1002,7 +1002,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ()) * main */ - m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_s08 (KERN_ATTR_BASIC ()) @@ -1072,7 +1072,7 @@ KERNEL_FQ void m03910_s08 (KERN_ATTR_BASIC ()) * main */ - m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_s16 (KERN_ATTR_BASIC ()) @@ -1142,5 +1142,5 @@ KERNEL_FQ void m03910_s16 (KERN_ATTR_BASIC ()) * main */ - m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04010_a3-optimized.cl b/OpenCL/m04010_a3-optimized.cl index 617d13dd2..a92825f6e 100644 --- a/OpenCL/m04010_a3-optimized.cl +++ b/OpenCL/m04010_a3-optimized.cl @@ -726,7 +726,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ()) * main */ - m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_m08 (KERN_ATTR_BASIC ()) @@ -796,7 +796,7 @@ KERNEL_FQ void m04010_m08 (KERN_ATTR_BASIC ()) * main */ - m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_m16 (KERN_ATTR_BASIC ()) @@ -866,7 +866,7 @@ KERNEL_FQ void m04010_m16 (KERN_ATTR_BASIC ()) * main */ - m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ()) @@ -936,7 +936,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ()) * main */ - m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_s08 (KERN_ATTR_BASIC ()) @@ -1006,7 +1006,7 @@ KERNEL_FQ void m04010_s08 (KERN_ATTR_BASIC ()) * main */ - m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_s16 (KERN_ATTR_BASIC ()) @@ -1076,5 +1076,5 @@ KERNEL_FQ void m04010_s16 (KERN_ATTR_BASIC ()) * main */ - m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04110_a3-optimized.cl b/OpenCL/m04110_a3-optimized.cl index cd71eca37..de008fcf7 100644 --- a/OpenCL/m04110_a3-optimized.cl +++ b/OpenCL/m04110_a3-optimized.cl @@ -782,7 +782,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ()) * main */ - m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_m08 (KERN_ATTR_BASIC ()) @@ -852,7 +852,7 @@ KERNEL_FQ void m04110_m08 (KERN_ATTR_BASIC ()) * main */ - m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_m16 (KERN_ATTR_BASIC ()) @@ -922,7 +922,7 @@ KERNEL_FQ void m04110_m16 (KERN_ATTR_BASIC ()) * main */ - m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ()) @@ -992,7 +992,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ()) * main */ - m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_s08 (KERN_ATTR_BASIC ()) @@ -1062,7 +1062,7 @@ KERNEL_FQ void m04110_s08 (KERN_ATTR_BASIC ()) * main */ - m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_s16 (KERN_ATTR_BASIC ()) @@ -1132,5 +1132,5 @@ KERNEL_FQ void m04110_s16 (KERN_ATTR_BASIC ()) * main */ - m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04310_a3-optimized.cl b/OpenCL/m04310_a3-optimized.cl index 8dea3f6b9..d638fd9d6 100644 --- a/OpenCL/m04310_a3-optimized.cl +++ b/OpenCL/m04310_a3-optimized.cl @@ -625,7 +625,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ()) * main */ - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_m08 (KERN_ATTR_BASIC ()) @@ -695,7 +695,7 @@ KERNEL_FQ void m04310_m08 (KERN_ATTR_BASIC ()) * main */ - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_m16 (KERN_ATTR_BASIC ()) @@ -765,7 +765,7 @@ KERNEL_FQ void m04310_m16 (KERN_ATTR_BASIC ()) * main */ - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ()) @@ -835,7 +835,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ()) * main */ - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_s08 (KERN_ATTR_BASIC ()) @@ -905,7 +905,7 @@ KERNEL_FQ void m04310_s08 (KERN_ATTR_BASIC ()) * main */ - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_s16 (KERN_ATTR_BASIC ()) @@ -975,5 +975,5 @@ KERNEL_FQ void m04310_s16 (KERN_ATTR_BASIC ()) * main */ - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04400_a3-optimized.cl b/OpenCL/m04400_a3-optimized.cl index 3667e4ca2..37a39783b 100644 --- a/OpenCL/m04400_a3-optimized.cl +++ b/OpenCL/m04400_a3-optimized.cl @@ -631,7 +631,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_BASIC ()) * main */ - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_m08 (KERN_ATTR_BASIC ()) @@ -701,7 +701,7 @@ KERNEL_FQ void m04400_m08 (KERN_ATTR_BASIC ()) * main */ - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_m16 (KERN_ATTR_BASIC ()) @@ -771,7 +771,7 @@ KERNEL_FQ void m04400_m16 (KERN_ATTR_BASIC ()) * main */ - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ()) @@ -841,7 +841,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ()) * main */ - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_s08 (KERN_ATTR_BASIC ()) @@ -911,7 +911,7 @@ KERNEL_FQ void m04400_s08 (KERN_ATTR_BASIC ()) * main */ - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_s16 (KERN_ATTR_BASIC ()) @@ -981,5 +981,5 @@ KERNEL_FQ void m04400_s16 (KERN_ATTR_BASIC ()) * main */ - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04500_a3-optimized.cl b/OpenCL/m04500_a3-optimized.cl index 8aca9f7bd..c33835c6f 100644 --- a/OpenCL/m04500_a3-optimized.cl +++ b/OpenCL/m04500_a3-optimized.cl @@ -690,7 +690,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_BASIC ()) * main */ - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_m08 (KERN_ATTR_BASIC ()) @@ -760,7 +760,7 @@ KERNEL_FQ void m04500_m08 (KERN_ATTR_BASIC ()) * main */ - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_m16 (KERN_ATTR_BASIC ()) @@ -830,7 +830,7 @@ KERNEL_FQ void m04500_m16 (KERN_ATTR_BASIC ()) * main */ - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ()) @@ -900,7 +900,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ()) * main */ - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_s08 (KERN_ATTR_BASIC ()) @@ -970,7 +970,7 @@ KERNEL_FQ void m04500_s08 (KERN_ATTR_BASIC ()) * main */ - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_s16 (KERN_ATTR_BASIC ()) @@ -1040,5 +1040,5 @@ KERNEL_FQ void m04500_s16 (KERN_ATTR_BASIC ()) * main */ - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04510_a0-pure.cl b/OpenCL/m04510_a0-pure.cl index 0f5b29315..46853da05 100644 --- a/OpenCL/m04510_a0-pure.cl +++ b/OpenCL/m04510_a0-pure.cl @@ -28,7 +28,7 @@ #define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) #endif -KERNEL_FQ void m04510_mxx (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m04510_mxx (KERN_ATTR_RULES ()) { /** * modifier diff --git a/OpenCL/m04510_a3-optimized.cl b/OpenCL/m04510_a3-optimized.cl index f6adfd732..2fd64e49a 100644 --- a/OpenCL/m04510_a3-optimized.cl +++ b/OpenCL/m04510_a3-optimized.cl @@ -2016,7 +2016,7 @@ KERNEL_FQ void m04510_m04 (KERN_ATTR_BASIC ()) * main */ - m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04510_m08 (KERN_ATTR_BASIC ()) @@ -2086,7 +2086,7 @@ KERNEL_FQ void m04510_m08 (KERN_ATTR_BASIC ()) * main */ - m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04510_m16 (KERN_ATTR_BASIC ()) @@ -2156,7 +2156,7 @@ KERNEL_FQ void m04510_m16 (KERN_ATTR_BASIC ()) * main */ - m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04510_s04 (KERN_ATTR_BASIC ()) @@ -2226,7 +2226,7 @@ KERNEL_FQ void m04510_s04 (KERN_ATTR_BASIC ()) * main */ - m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04510_s08 (KERN_ATTR_BASIC ()) @@ -2296,7 +2296,7 @@ KERNEL_FQ void m04510_s08 (KERN_ATTR_BASIC ()) * main */ - m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04510_s16 (KERN_ATTR_BASIC ()) @@ -2366,5 +2366,5 @@ KERNEL_FQ void m04510_s16 (KERN_ATTR_BASIC ()) * main */ - m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04520_a3-optimized.cl b/OpenCL/m04520_a3-optimized.cl index 3190da297..a5015e054 100644 --- a/OpenCL/m04520_a3-optimized.cl +++ b/OpenCL/m04520_a3-optimized.cl @@ -1143,7 +1143,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ()) * main */ - m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_m08 (KERN_ATTR_BASIC ()) @@ -1213,7 +1213,7 @@ KERNEL_FQ void m04520_m08 (KERN_ATTR_BASIC ()) * main */ - m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_m16 (KERN_ATTR_BASIC ()) @@ -1283,7 +1283,7 @@ KERNEL_FQ void m04520_m16 (KERN_ATTR_BASIC ()) * main */ - m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ()) @@ -1353,7 +1353,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ()) * main */ - m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_s08 (KERN_ATTR_BASIC ()) @@ -1423,7 +1423,7 @@ KERNEL_FQ void m04520_s08 (KERN_ATTR_BASIC ()) * main */ - m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_s16 (KERN_ATTR_BASIC ()) @@ -1493,5 +1493,5 @@ KERNEL_FQ void m04520_s16 (KERN_ATTR_BASIC ()) * main */ - m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04700_a3-optimized.cl b/OpenCL/m04700_a3-optimized.cl index 2d3a34f10..db37ac820 100644 --- a/OpenCL/m04700_a3-optimized.cl +++ b/OpenCL/m04700_a3-optimized.cl @@ -631,7 +631,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_BASIC ()) * main */ - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_m08 (KERN_ATTR_BASIC ()) @@ -701,7 +701,7 @@ KERNEL_FQ void m04700_m08 (KERN_ATTR_BASIC ()) * main */ - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_m16 (KERN_ATTR_BASIC ()) @@ -771,7 +771,7 @@ KERNEL_FQ void m04700_m16 (KERN_ATTR_BASIC ()) * main */ - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ()) @@ -841,7 +841,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ()) * main */ - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_s08 (KERN_ATTR_BASIC ()) @@ -911,7 +911,7 @@ KERNEL_FQ void m04700_s08 (KERN_ATTR_BASIC ()) * main */ - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_s16 (KERN_ATTR_BASIC ()) @@ -981,5 +981,5 @@ KERNEL_FQ void m04700_s16 (KERN_ATTR_BASIC ()) * main */ - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04710_a3-optimized.cl b/OpenCL/m04710_a3-optimized.cl index 1f15824ec..767f2a2ce 100644 --- a/OpenCL/m04710_a3-optimized.cl +++ b/OpenCL/m04710_a3-optimized.cl @@ -1441,7 +1441,7 @@ KERNEL_FQ void m04710_m04 (KERN_ATTR_BASIC ()) * main */ - m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_m08 (KERN_ATTR_BASIC ()) @@ -1511,7 +1511,7 @@ KERNEL_FQ void m04710_m08 (KERN_ATTR_BASIC ()) * main */ - m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_m16 (KERN_ATTR_BASIC ()) @@ -1581,7 +1581,7 @@ KERNEL_FQ void m04710_m16 (KERN_ATTR_BASIC ()) * main */ - m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_s04 (KERN_ATTR_BASIC ()) @@ -1651,7 +1651,7 @@ KERNEL_FQ void m04710_s04 (KERN_ATTR_BASIC ()) * main */ - m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_s08 (KERN_ATTR_BASIC ()) @@ -1721,7 +1721,7 @@ KERNEL_FQ void m04710_s08 (KERN_ATTR_BASIC ()) * main */ - m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_s16 (KERN_ATTR_BASIC ()) @@ -1791,5 +1791,5 @@ KERNEL_FQ void m04710_s16 (KERN_ATTR_BASIC ()) * main */ - m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04710_a3-pure.cl b/OpenCL/m04710_a3-pure.cl index 950c434d3..ab776db6a 100644 --- a/OpenCL/m04710_a3-pure.cl +++ b/OpenCL/m04710_a3-pure.cl @@ -211,7 +211,7 @@ KERNEL_FQ void m04710_sxx (KERN_ATTR_VECTOR ()) const u32 salt_len = salt_bufs[SALT_POS].salt_len; - u32x s[8] = { 0 }; + u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { diff --git a/OpenCL/m04800_a3-optimized.cl b/OpenCL/m04800_a3-optimized.cl index 4d237db53..70ab9aded 100644 --- a/OpenCL/m04800_a3-optimized.cl +++ b/OpenCL/m04800_a3-optimized.cl @@ -447,7 +447,7 @@ KERNEL_FQ void m04800_m04 (KERN_ATTR_BASIC ()) * main */ - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_m08 (KERN_ATTR_BASIC ()) @@ -500,7 +500,7 @@ KERNEL_FQ void m04800_m08 (KERN_ATTR_BASIC ()) * main */ - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_m16 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m04800_m16 (KERN_ATTR_BASIC ()) * main */ - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_s04 (KERN_ATTR_BASIC ()) @@ -606,7 +606,7 @@ KERNEL_FQ void m04800_s04 (KERN_ATTR_BASIC ()) * main */ - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_s08 (KERN_ATTR_BASIC ()) @@ -659,7 +659,7 @@ KERNEL_FQ void m04800_s08 (KERN_ATTR_BASIC ()) * main */ - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_s16 (KERN_ATTR_BASIC ()) @@ -712,5 +712,5 @@ KERNEL_FQ void m04800_s16 (KERN_ATTR_BASIC ()) * main */ - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m04900_a3-optimized.cl b/OpenCL/m04900_a3-optimized.cl index 767bcc13f..6944cc100 100644 --- a/OpenCL/m04900_a3-optimized.cl +++ b/OpenCL/m04900_a3-optimized.cl @@ -619,7 +619,7 @@ KERNEL_FQ void m04900_m04 (KERN_ATTR_BASIC ()) * main */ - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_m08 (KERN_ATTR_BASIC ()) @@ -672,7 +672,7 @@ KERNEL_FQ void m04900_m08 (KERN_ATTR_BASIC ()) * main */ - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_m16 (KERN_ATTR_BASIC ()) @@ -725,7 +725,7 @@ KERNEL_FQ void m04900_m16 (KERN_ATTR_BASIC ()) * main */ - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_s04 (KERN_ATTR_BASIC ()) @@ -778,7 +778,7 @@ KERNEL_FQ void m04900_s04 (KERN_ATTR_BASIC ()) * main */ - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_s08 (KERN_ATTR_BASIC ()) @@ -831,7 +831,7 @@ KERNEL_FQ void m04900_s08 (KERN_ATTR_BASIC ()) * main */ - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_s16 (KERN_ATTR_BASIC ()) @@ -884,5 +884,5 @@ KERNEL_FQ void m04900_s16 (KERN_ATTR_BASIC ()) * main */ - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m05100_a3-optimized.cl b/OpenCL/m05100_a3-optimized.cl index 54ee788eb..46936de70 100644 --- a/OpenCL/m05100_a3-optimized.cl +++ b/OpenCL/m05100_a3-optimized.cl @@ -340,7 +340,7 @@ KERNEL_FQ void m05100_m04 (KERN_ATTR_BASIC ()) * main */ - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_m08 (KERN_ATTR_BASIC ()) @@ -387,7 +387,7 @@ KERNEL_FQ void m05100_m08 (KERN_ATTR_BASIC ()) * main */ - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_m16 (KERN_ATTR_BASIC ()) @@ -434,7 +434,7 @@ KERNEL_FQ void m05100_m16 (KERN_ATTR_BASIC ()) * main */ - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_s04 (KERN_ATTR_BASIC ()) @@ -481,7 +481,7 @@ KERNEL_FQ void m05100_s04 (KERN_ATTR_BASIC ()) * main */ - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_s08 (KERN_ATTR_BASIC ()) @@ -528,7 +528,7 @@ KERNEL_FQ void m05100_s08 (KERN_ATTR_BASIC ()) * main */ - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_s16 (KERN_ATTR_BASIC ()) @@ -575,5 +575,5 @@ KERNEL_FQ void m05100_s16 (KERN_ATTR_BASIC ()) * main */ - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m05300_a3-optimized.cl b/OpenCL/m05300_a3-optimized.cl index fe0ea87ce..6ffe822bd 100644 --- a/OpenCL/m05300_a3-optimized.cl +++ b/OpenCL/m05300_a3-optimized.cl @@ -485,7 +485,7 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_m08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -558,7 +558,7 @@ KERNEL_FQ void m05300_m08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_m16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -631,7 +631,7 @@ KERNEL_FQ void m05300_m16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) @@ -704,7 +704,7 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_s08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -777,7 +777,7 @@ KERNEL_FQ void m05300_s08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_s16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -850,5 +850,5 @@ KERNEL_FQ void m05300_s16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } diff --git a/OpenCL/m05400_a3-optimized.cl b/OpenCL/m05400_a3-optimized.cl index 196ab15b1..52b14244c 100644 --- a/OpenCL/m05400_a3-optimized.cl +++ b/OpenCL/m05400_a3-optimized.cl @@ -489,7 +489,7 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_m08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -562,7 +562,7 @@ KERNEL_FQ void m05400_m08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_m16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -635,7 +635,7 @@ KERNEL_FQ void m05400_m16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) @@ -708,7 +708,7 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_s08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -781,7 +781,7 @@ KERNEL_FQ void m05400_s08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_s16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -854,5 +854,5 @@ KERNEL_FQ void m05400_s16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_msg_buf, s_nr_buf); + m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl index 52c7dd0eb..f68221f98 100644 --- a/OpenCL/m05500_a3-optimized.cl +++ b/OpenCL/m05500_a3-optimized.cl @@ -913,7 +913,7 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_VECTOR ()) * main */ - m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ()) @@ -996,7 +996,7 @@ KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ()) * main */ - m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ()) @@ -1079,7 +1079,7 @@ KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ()) * main */ - m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ()) @@ -1162,7 +1162,7 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ()) * main */ - m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ()) @@ -1245,7 +1245,7 @@ KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ()) * main */ - m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ()) @@ -1328,5 +1328,5 @@ KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ()) * main */ - m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m05600_a3-optimized.cl b/OpenCL/m05600_a3-optimized.cl index 541be42ba..cb6d6fa93 100644 --- a/OpenCL/m05600_a3-optimized.cl +++ b/OpenCL/m05600_a3-optimized.cl @@ -606,7 +606,7 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); + m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_m08 (KERN_ATTR_ESALT (netntlm_t)) @@ -679,7 +679,7 @@ KERNEL_FQ void m05600_m08 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); + m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_m16 (KERN_ATTR_ESALT (netntlm_t)) @@ -752,7 +752,7 @@ KERNEL_FQ void m05600_m16 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); + m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t)) @@ -825,7 +825,7 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); + m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_s08 (KERN_ATTR_ESALT (netntlm_t)) @@ -898,7 +898,7 @@ KERNEL_FQ void m05600_s08 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); + m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_s16 (KERN_ATTR_ESALT (netntlm_t)) @@ -971,5 +971,5 @@ KERNEL_FQ void m05600_s16 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); + m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } diff --git a/OpenCL/m06000_a3-optimized.cl b/OpenCL/m06000_a3-optimized.cl index 0b88557b3..6dd6b9bca 100644 --- a/OpenCL/m06000_a3-optimized.cl +++ b/OpenCL/m06000_a3-optimized.cl @@ -191,7 +191,7 @@ KERNEL_FQ void m06000_m04 (KERN_ATTR_BASIC ()) * main */ - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_m08 (KERN_ATTR_BASIC ()) @@ -238,7 +238,7 @@ KERNEL_FQ void m06000_m08 (KERN_ATTR_BASIC ()) * main */ - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_m16 (KERN_ATTR_BASIC ()) @@ -285,7 +285,7 @@ KERNEL_FQ void m06000_m16 (KERN_ATTR_BASIC ()) * main */ - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_s04 (KERN_ATTR_BASIC ()) @@ -332,7 +332,7 @@ KERNEL_FQ void m06000_s04 (KERN_ATTR_BASIC ()) * main */ - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_s08 (KERN_ATTR_BASIC ()) @@ -379,7 +379,7 @@ KERNEL_FQ void m06000_s08 (KERN_ATTR_BASIC ()) * main */ - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_s16 (KERN_ATTR_BASIC ()) @@ -426,5 +426,5 @@ KERNEL_FQ void m06000_s16 (KERN_ATTR_BASIC ()) * main */ - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl index 64f1db547..8b192c575 100644 --- a/OpenCL/m06100_a3-optimized.cl +++ b/OpenCL/m06100_a3-optimized.cl @@ -261,7 +261,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) * main */ - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) @@ -356,7 +356,7 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) * main */ - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_m16 (KERN_ATTR_BASIC ()) @@ -455,7 +455,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) * main */ - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) @@ -550,7 +550,7 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) * main */ - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m06211-pure.cl b/OpenCL/m06211-pure.cl index 5b69e9299..e5886a5ae 100644 --- a/OpenCL/m06211-pure.cl +++ b/OpenCL/m06211-pure.cl @@ -376,7 +376,7 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -384,7 +384,7 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -392,7 +392,7 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06212-pure.cl b/OpenCL/m06212-pure.cl index 91472f768..3e4a1b759 100644 --- a/OpenCL/m06212-pure.cl +++ b/OpenCL/m06212-pure.cl @@ -376,7 +376,7 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -384,7 +384,7 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -392,7 +392,7 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -422,7 +422,7 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -430,7 +430,7 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -438,7 +438,7 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06213-pure.cl b/OpenCL/m06213-pure.cl index 77a436b0a..87961685d 100644 --- a/OpenCL/m06213-pure.cl +++ b/OpenCL/m06213-pure.cl @@ -376,7 +376,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -384,7 +384,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -392,7 +392,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -422,7 +422,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -430,7 +430,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -438,7 +438,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -468,7 +468,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -476,7 +476,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06221-pure.cl b/OpenCL/m06221-pure.cl index bcfd2e90a..fbd139fe0 100644 --- a/OpenCL/m06221-pure.cl +++ b/OpenCL/m06221-pure.cl @@ -520,7 +520,7 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -528,7 +528,7 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -536,7 +536,7 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06222-pure.cl b/OpenCL/m06222-pure.cl index 4e5b257bf..78bd93c9b 100644 --- a/OpenCL/m06222-pure.cl +++ b/OpenCL/m06222-pure.cl @@ -520,7 +520,7 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -528,7 +528,7 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -536,7 +536,7 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -566,7 +566,7 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -574,7 +574,7 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -582,7 +582,7 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06223-pure.cl b/OpenCL/m06223-pure.cl index f9c41f8bd..fc1312acb 100644 --- a/OpenCL/m06223-pure.cl +++ b/OpenCL/m06223-pure.cl @@ -520,7 +520,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -528,7 +528,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -536,7 +536,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -566,7 +566,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -574,7 +574,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -582,7 +582,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -612,7 +612,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -620,7 +620,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl index 15ce42c02..a2f917dcc 100644 --- a/OpenCL/m06231-pure.cl +++ b/OpenCL/m06231-pure.cl @@ -660,7 +660,7 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -668,7 +668,7 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -676,7 +676,7 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl index cf7509fe2..7baf3df06 100644 --- a/OpenCL/m06232-pure.cl +++ b/OpenCL/m06232-pure.cl @@ -660,7 +660,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -668,7 +668,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -676,7 +676,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -706,7 +706,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -714,7 +714,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -722,7 +722,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl index 782a9d61b..aec511bb5 100644 --- a/OpenCL/m06233-pure.cl +++ b/OpenCL/m06233-pure.cl @@ -660,7 +660,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -668,7 +668,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -676,7 +676,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -706,7 +706,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -714,7 +714,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -722,7 +722,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -752,7 +752,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -760,7 +760,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06800-pure.cl b/OpenCL/m06800-pure.cl index 9fb03b660..56d50c039 100644 --- a/OpenCL/m06800-pure.cl +++ b/OpenCL/m06800-pure.cl @@ -374,7 +374,7 @@ KERNEL_FQ void m06800_comp (KERN_ATTR_TMPS (lastpass_tmp_t)) && (out[2] == salt_buf[2]) && (out[3] == salt_buf[3])) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl index 4fa77218a..289cda5bc 100644 --- a/OpenCL/m06900_a3-optimized.cl +++ b/OpenCL/m06900_a3-optimized.cl @@ -1122,7 +1122,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_BASIC ()) * main */ - m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_tables); + m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_m08 (KERN_ATTR_BASIC ()) @@ -1191,7 +1191,7 @@ KERNEL_FQ void m06900_m08 (KERN_ATTR_BASIC ()) * main */ - m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_tables); + m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_m16 (KERN_ATTR_BASIC ()) @@ -1264,7 +1264,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_BASIC ()) * main */ - m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_tables); + m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_s08 (KERN_ATTR_BASIC ()) @@ -1333,7 +1333,7 @@ KERNEL_FQ void m06900_s08 (KERN_ATTR_BASIC ()) * main */ - m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_tables); + m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07000_a3-optimized.cl b/OpenCL/m07000_a3-optimized.cl index 50d6198be..71d52a4e8 100644 --- a/OpenCL/m07000_a3-optimized.cl +++ b/OpenCL/m07000_a3-optimized.cl @@ -619,7 +619,7 @@ KERNEL_FQ void m07000_m04 (KERN_ATTR_BASIC ()) * main */ - m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_m08 (KERN_ATTR_BASIC ()) @@ -672,7 +672,7 @@ KERNEL_FQ void m07000_m08 (KERN_ATTR_BASIC ()) * main */ - m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_m16 (KERN_ATTR_BASIC ()) @@ -725,7 +725,7 @@ KERNEL_FQ void m07000_m16 (KERN_ATTR_BASIC ()) * main */ - m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_s04 (KERN_ATTR_BASIC ()) @@ -778,7 +778,7 @@ KERNEL_FQ void m07000_s04 (KERN_ATTR_BASIC ()) * main */ - m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_s08 (KERN_ATTR_BASIC ()) @@ -831,7 +831,7 @@ KERNEL_FQ void m07000_s08 (KERN_ATTR_BASIC ()) * main */ - m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_s16 (KERN_ATTR_BASIC ()) @@ -884,5 +884,5 @@ KERNEL_FQ void m07000_s16 (KERN_ATTR_BASIC ()) * main */ - m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m07300_a3-optimized.cl b/OpenCL/m07300_a3-optimized.cl index b3609e5e4..aec06f89b 100644 --- a/OpenCL/m07300_a3-optimized.cl +++ b/OpenCL/m07300_a3-optimized.cl @@ -405,7 +405,7 @@ KERNEL_FQ void m07300_m04 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_esalt_buf); + m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_m08 (KERN_ATTR_ESALT (rakp_t)) @@ -471,7 +471,7 @@ KERNEL_FQ void m07300_m08 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_esalt_buf); + m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_m16 (KERN_ATTR_ESALT (rakp_t)) @@ -537,7 +537,7 @@ KERNEL_FQ void m07300_m16 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_esalt_buf); + m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) @@ -603,7 +603,7 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_esalt_buf); + m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_s08 (KERN_ATTR_ESALT (rakp_t)) @@ -669,7 +669,7 @@ KERNEL_FQ void m07300_s08 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_esalt_buf); + m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_s16 (KERN_ATTR_ESALT (rakp_t)) @@ -735,5 +735,5 @@ KERNEL_FQ void m07300_s16 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, s_esalt_buf); + m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } diff --git a/OpenCL/m07500_a0-optimized.cl b/OpenCL/m07500_a0-optimized.cl index 2ee1a980d..7ecff8847 100644 --- a/OpenCL/m07500_a0-optimized.cl +++ b/OpenCL/m07500_a0-optimized.cl @@ -485,7 +485,7 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_RULES_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -591,7 +591,7 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_RULES_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m07500_a0-pure.cl b/OpenCL/m07500_a0-pure.cl index 7aab83068..4981b23bd 100644 --- a/OpenCL/m07500_a0-pure.cl +++ b/OpenCL/m07500_a0-pure.cl @@ -342,7 +342,7 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_RULES_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -413,7 +413,7 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_RULES_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m07500_a1-optimized.cl b/OpenCL/m07500_a1-optimized.cl index b29fa42cd..7ba0b483a 100644 --- a/OpenCL/m07500_a1-optimized.cl +++ b/OpenCL/m07500_a1-optimized.cl @@ -533,7 +533,7 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -689,7 +689,7 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m07500_a1-pure.cl b/OpenCL/m07500_a1-pure.cl index f843ced12..1d1873f81 100644 --- a/OpenCL/m07500_a1-pure.cl +++ b/OpenCL/m07500_a1-pure.cl @@ -338,7 +338,7 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -407,7 +407,7 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m07500_a3-optimized.cl b/OpenCL/m07500_a3-optimized.cl index 2bb5398a3..7a1739825 100644 --- a/OpenCL/m07500_a3-optimized.cl +++ b/OpenCL/m07500_a3-optimized.cl @@ -475,7 +475,7 @@ DECLSPEC void m07500 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -532,7 +532,7 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_m08 (KERN_ATTR_ESALT (krb5pa_t)) @@ -584,7 +584,7 @@ KERNEL_FQ void m07500_m08 (KERN_ATTR_ESALT (krb5pa_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_m16 (KERN_ATTR_ESALT (krb5pa_t)) @@ -640,7 +640,7 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_s08 (KERN_ATTR_ESALT (krb5pa_t)) @@ -692,7 +692,7 @@ KERNEL_FQ void m07500_s08 (KERN_ATTR_ESALT (krb5pa_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_s16 (KERN_ATTR_ESALT (krb5pa_t)) diff --git a/OpenCL/m07500_a3-pure.cl b/OpenCL/m07500_a3-pure.cl index 79792aa16..602678806 100644 --- a/OpenCL/m07500_a3-pure.cl +++ b/OpenCL/m07500_a3-pure.cl @@ -363,7 +363,7 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -457,7 +457,7 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m07700_a3-optimized.cl b/OpenCL/m07700_a3-optimized.cl index 0f76627a6..53dbb1fe2 100644 --- a/OpenCL/m07700_a3-optimized.cl +++ b/OpenCL/m07700_a3-optimized.cl @@ -454,7 +454,7 @@ KERNEL_FQ void m07700_m04 (KERN_ATTR_BASIC ()) * main */ - m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_m08 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m07700_m08 (KERN_ATTR_BASIC ()) * main */ - m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_m16 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m07700_s04 (KERN_ATTR_BASIC ()) * main */ - m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_s08 (KERN_ATTR_BASIC ()) @@ -617,7 +617,7 @@ KERNEL_FQ void m07700_s08 (KERN_ATTR_BASIC ()) * main */ - m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl index a2fa0d081..ae6762e90 100644 --- a/OpenCL/m07701_a3-optimized.cl +++ b/OpenCL/m07701_a3-optimized.cl @@ -454,7 +454,7 @@ KERNEL_FQ void m07701_m04 (KERN_ATTR_BASIC ()) * main */ - m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_m08 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m07701_m08 (KERN_ATTR_BASIC ()) * main */ - m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_m16 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m07701_s04 (KERN_ATTR_BASIC ()) * main */ - m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_s08 (KERN_ATTR_BASIC ()) @@ -617,7 +617,7 @@ KERNEL_FQ void m07701_s08 (KERN_ATTR_BASIC ()) * main */ - m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07800_a3-optimized.cl b/OpenCL/m07800_a3-optimized.cl index 0bc73951d..71bc23077 100644 --- a/OpenCL/m07800_a3-optimized.cl +++ b/OpenCL/m07800_a3-optimized.cl @@ -554,7 +554,7 @@ KERNEL_FQ void m07800_m04 (KERN_ATTR_BASIC ()) * main */ - m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_m08 (KERN_ATTR_BASIC ()) @@ -607,7 +607,7 @@ KERNEL_FQ void m07800_m08 (KERN_ATTR_BASIC ()) * main */ - m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_m16 (KERN_ATTR_BASIC ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m07800_s04 (KERN_ATTR_BASIC ()) * main */ - m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_s08 (KERN_ATTR_BASIC ()) @@ -717,7 +717,7 @@ KERNEL_FQ void m07800_s08 (KERN_ATTR_BASIC ()) * main */ - m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07801_a3-optimized.cl b/OpenCL/m07801_a3-optimized.cl index b2fc638f5..564c0daf9 100644 --- a/OpenCL/m07801_a3-optimized.cl +++ b/OpenCL/m07801_a3-optimized.cl @@ -554,7 +554,7 @@ KERNEL_FQ void m07801_m04 (KERN_ATTR_BASIC ()) * main */ - m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_m08 (KERN_ATTR_BASIC ()) @@ -607,7 +607,7 @@ KERNEL_FQ void m07801_m08 (KERN_ATTR_BASIC ()) * main */ - m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_m16 (KERN_ATTR_BASIC ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m07801_s04 (KERN_ATTR_BASIC ()) * main */ - m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_s08 (KERN_ATTR_BASIC ()) @@ -717,7 +717,7 @@ KERNEL_FQ void m07801_s08 (KERN_ATTR_BASIC ()) * main */ - m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl index de2cedd33..2fe762be3 100644 --- a/OpenCL/m08000_a3-optimized.cl +++ b/OpenCL/m08000_a3-optimized.cl @@ -508,7 +508,7 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_VECTOR ()) * main */ - m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_m08 (KERN_ATTR_VECTOR ()) @@ -547,7 +547,7 @@ KERNEL_FQ void m08000_m08 (KERN_ATTR_VECTOR ()) * main */ - m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_m16 (KERN_ATTR_VECTOR ()) @@ -586,7 +586,7 @@ KERNEL_FQ void m08000_m16 (KERN_ATTR_VECTOR ()) * main */ - m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_s04 (KERN_ATTR_VECTOR ()) @@ -625,7 +625,7 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_VECTOR ()) * main */ - m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_s08 (KERN_ATTR_VECTOR ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m08000_s08 (KERN_ATTR_VECTOR ()) * main */ - m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_s16 (KERN_ATTR_VECTOR ()) @@ -703,5 +703,5 @@ KERNEL_FQ void m08000_s16 (KERN_ATTR_VECTOR ()) * main */ - m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08100_a3-optimized.cl b/OpenCL/m08100_a3-optimized.cl index 2f31d38d7..37d413f93 100644 --- a/OpenCL/m08100_a3-optimized.cl +++ b/OpenCL/m08100_a3-optimized.cl @@ -418,7 +418,7 @@ KERNEL_FQ void m08100_m04 (KERN_ATTR_BASIC ()) * main */ - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_m08 (KERN_ATTR_BASIC ()) @@ -490,7 +490,7 @@ KERNEL_FQ void m08100_m08 (KERN_ATTR_BASIC ()) * main */ - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_m16 (KERN_ATTR_BASIC ()) @@ -577,7 +577,7 @@ KERNEL_FQ void m08100_m16 (KERN_ATTR_BASIC ()) * main */ - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_s04 (KERN_ATTR_BASIC ()) @@ -641,7 +641,7 @@ KERNEL_FQ void m08100_s04 (KERN_ATTR_BASIC ()) * main */ - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_s08 (KERN_ATTR_BASIC ()) @@ -713,7 +713,7 @@ KERNEL_FQ void m08100_s08 (KERN_ATTR_BASIC ()) * main */ - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_s16 (KERN_ATTR_BASIC ()) @@ -800,5 +800,5 @@ KERNEL_FQ void m08100_s16 (KERN_ATTR_BASIC ()) * main */ - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08300_a3-optimized.cl b/OpenCL/m08300_a3-optimized.cl index d24d9f5c8..1c9dee828 100644 --- a/OpenCL/m08300_a3-optimized.cl +++ b/OpenCL/m08300_a3-optimized.cl @@ -701,7 +701,7 @@ KERNEL_FQ void m08300_m04 (KERN_ATTR_BASIC ()) * main */ - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_m08 (KERN_ATTR_BASIC ()) @@ -748,7 +748,7 @@ KERNEL_FQ void m08300_m08 (KERN_ATTR_BASIC ()) * main */ - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_m16 (KERN_ATTR_BASIC ()) @@ -795,7 +795,7 @@ KERNEL_FQ void m08300_m16 (KERN_ATTR_BASIC ()) * main */ - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_s04 (KERN_ATTR_BASIC ()) @@ -842,7 +842,7 @@ KERNEL_FQ void m08300_s04 (KERN_ATTR_BASIC ()) * main */ - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_s08 (KERN_ATTR_BASIC ()) @@ -889,7 +889,7 @@ KERNEL_FQ void m08300_s08 (KERN_ATTR_BASIC ()) * main */ - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_s16 (KERN_ATTR_BASIC ()) @@ -936,5 +936,5 @@ KERNEL_FQ void m08300_s16 (KERN_ATTR_BASIC ()) * main */ - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08400_a3-optimized.cl b/OpenCL/m08400_a3-optimized.cl index 4a7dbaeca..3bc0f21f0 100644 --- a/OpenCL/m08400_a3-optimized.cl +++ b/OpenCL/m08400_a3-optimized.cl @@ -525,7 +525,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ()) * main */ - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_m08 (KERN_ATTR_BASIC ()) @@ -595,7 +595,7 @@ KERNEL_FQ void m08400_m08 (KERN_ATTR_BASIC ()) * main */ - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_m16 (KERN_ATTR_BASIC ()) @@ -665,7 +665,7 @@ KERNEL_FQ void m08400_m16 (KERN_ATTR_BASIC ()) * main */ - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ()) @@ -735,7 +735,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ()) * main */ - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_s08 (KERN_ATTR_BASIC ()) @@ -805,7 +805,7 @@ KERNEL_FQ void m08400_s08 (KERN_ATTR_BASIC ()) * main */ - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_s16 (KERN_ATTR_BASIC ()) @@ -875,5 +875,5 @@ KERNEL_FQ void m08400_s16 (KERN_ATTR_BASIC ()) * main */ - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl index 6477bb0a7..465b12481 100644 --- a/OpenCL/m08500_a3-pure.cl +++ b/OpenCL/m08500_a3-pure.cl @@ -729,7 +729,7 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_VECTOR ()) * main */ - m08500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08500_sxx (KERN_ATTR_VECTOR ()) @@ -803,5 +803,5 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_VECTOR ()) * main */ - m08500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl index 4b9cfa0fa..9f24e4640 100644 --- a/OpenCL/m08600_a3-pure.cl +++ b/OpenCL/m08600_a3-pure.cl @@ -422,7 +422,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_VECTOR ()) * main */ - m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08600_sxx (KERN_ATTR_VECTOR ()) @@ -479,5 +479,5 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_VECTOR ()) * main */ - m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl index 89e343646..c09fa2eae 100644 --- a/OpenCL/m08700_a3-optimized.cl +++ b/OpenCL/m08700_a3-optimized.cl @@ -644,7 +644,7 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_VECTOR ()) * main */ - m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_m08 (KERN_ATTR_VECTOR ()) @@ -712,7 +712,7 @@ KERNEL_FQ void m08700_m08 (KERN_ATTR_VECTOR ()) * main */ - m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_m16 (KERN_ATTR_VECTOR ()) @@ -780,7 +780,7 @@ KERNEL_FQ void m08700_m16 (KERN_ATTR_VECTOR ()) * main */ - m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_s04 (KERN_ATTR_VECTOR ()) @@ -848,7 +848,7 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_VECTOR ()) * main */ - m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_s08 (KERN_ATTR_VECTOR ()) @@ -916,7 +916,7 @@ KERNEL_FQ void m08700_s08 (KERN_ATTR_VECTOR ()) * main */ - m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_s16 (KERN_ATTR_VECTOR ()) @@ -984,5 +984,5 @@ KERNEL_FQ void m08700_s16 (KERN_ATTR_VECTOR ()) * main */ - m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08800-pure.cl b/OpenCL/m08800-pure.cl index 0d1949aa8..18773c92d 100644 --- a/OpenCL/m08800-pure.cl +++ b/OpenCL/m08800-pure.cl @@ -415,7 +415,7 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t // MSDOS5.0 if ((r0 == 0x4f44534d) && (r1 == 0x302e3553)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } @@ -474,7 +474,7 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t if ((r[5] < 2) && (r[6] < 16) && ((r[14] & 0xffff) == 0xEF53)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl index cb0077e17..162b6b9a4 100644 --- a/OpenCL/m08900-pure.cl +++ b/OpenCL/m08900-pure.cl @@ -102,28 +102,8 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) } #endif -#define SALSA20_8_XOR() \ -{ \ - R0 = R0 ^ Y0; \ - R1 = R1 ^ Y1; \ - R2 = R2 ^ Y2; \ - R3 = R3 ^ Y3; \ - \ - uint4 X0 = R0; \ - uint4 X1 = R1; \ - uint4 X2 = R2; \ - uint4 X3 = R3; \ - \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - \ - R0 = R0 + X0; \ - R1 = R1 + X1; \ - R2 = R2 + X2; \ - R3 = R3 + X3; \ -} +#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) +#define CO Coord(xd4,y,z) DECLSPEC void salsa_r (uint4 *TI) { @@ -132,56 +112,72 @@ DECLSPEC void salsa_r (uint4 *TI) uint4 R2 = TI[STATE_CNT4 - 2]; uint4 R3 = TI[STATE_CNT4 - 1]; - uint4 TO[STATE_CNT4]; + for (int i = 0; i < STATE_CNT4; i += 4) + { + uint4 Y0 = TI[i + 0]; + uint4 Y1 = TI[i + 1]; + uint4 Y2 = TI[i + 2]; + uint4 Y3 = TI[i + 3]; + + R0 = R0 ^ Y0; + R1 = R1 ^ Y1; + R2 = R2 ^ Y2; + R3 = R3 ^ Y3; + + uint4 X0 = R0; + uint4 X1 = R1; + uint4 X2 = R2; + uint4 X3 = R3; + + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + + R0 = R0 + X0; + R1 = R1 + X1; + R2 = R2 + X2; + R3 = R3 + X3; + + TI[i + 0] = R0; + TI[i + 1] = R1; + TI[i + 2] = R2; + TI[i + 3] = R3; + } + + #if SCRYPT_R > 1 - int idx_y = 0; - int idx_r1 = 0; - int idx_r2 = SCRYPT_R * 4; + uint4 TT[STATE_CNT4 / 2]; + + for (int dst_off = 0, src_off = 4; src_off < STATE_CNT4; dst_off += 4, src_off += 8) + { + TT[dst_off + 0] = TI[src_off + 0]; + TT[dst_off + 1] = TI[src_off + 1]; + TT[dst_off + 2] = TI[src_off + 2]; + TT[dst_off + 3] = TI[src_off + 3]; + } - for (int i = 0; i < SCRYPT_R; i++) + for (int dst_off = 4, src_off = 8; src_off < STATE_CNT4; dst_off += 4, src_off += 8) { - uint4 Y0; - uint4 Y1; - uint4 Y2; - uint4 Y3; - - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; - - SALSA20_8_XOR (); - - TO[idx_r1++] = R0; - TO[idx_r1++] = R1; - TO[idx_r1++] = R2; - TO[idx_r1++] = R3; - - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; - - SALSA20_8_XOR (); - - TO[idx_r2++] = R0; - TO[idx_r2++] = R1; - TO[idx_r2++] = R2; - TO[idx_r2++] = R3; + TI[dst_off + 0] = TI[src_off + 0]; + TI[dst_off + 1] = TI[src_off + 1]; + TI[dst_off + 2] = TI[src_off + 2]; + TI[dst_off + 3] = TI[src_off + 3]; } - #pragma unroll - for (int i = 0; i < STATE_CNT4; i++) + for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += 4, src_off += 4) { - TI[i] = TO[i]; + TI[dst_off + 0] = TT[src_off + 0]; + TI[dst_off + 1] = TT[src_off + 1]; + TI[dst_off + 2] = TT[src_off + 2]; + TI[dst_off + 3] = TT[src_off + 3]; } + + #endif } -DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +DECLSPEC void scrypt_smix_init (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) { - #define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) - #define CO Coord(xd4,y,z) - const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; @@ -200,37 +196,37 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui case 3: V = V3; break; } - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #ifdef IS_CUDA - T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } - for (u32 y = 0; y < ySIZE; y++) { for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } +} + +DECLSPEC void scrypt_smix_loop (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +{ + const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; + const u32 zSIZE = STATE_CNT4; + + const u32 x = get_global_id (0); - for (u32 i = 0; i < SCRYPT_N; i++) + const u32 xd4 = x / 4; + const u32 xm4 = x & 3; + + GLOBAL_AS uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + + // note: fixed 1024 iterations = forced -u 1024 + + for (u32 N_pos = 0; N_pos < 1024; N_pos++) { const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1); @@ -238,6 +234,8 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui const u32 km = k - (y * SCRYPT_TMTO); + uint4 T[STATE_CNT4]; + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); @@ -246,29 +244,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui salsa_r (X); } - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #ifdef IS_CUDA - T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } } KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -339,44 +314,96 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) tmps[gid].P[k + 0] = tmp0; tmps[gid].P[k + 1] = tmp1; } + + for (u32 l = 0; l < SCRYPT_CNT4; l += 4) + { + uint4 T[4]; + + T[0] = tmps[gid].P[l + 0]; + T[1] = tmps[gid].P[l + 1]; + T[2] = tmps[gid].P[l + 2]; + T[3] = tmps[gid].P[l + 3]; + + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + uint4 X[4]; + + #ifdef IS_CUDA + X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #else + X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w); + #endif + + tmps[gid].P[l + 0] = X[0]; + tmps[gid].P[l + 1] = X[1]; + tmps[gid].P[l + 2] = X[2]; + tmps[gid].P[l + 3] = X[3]; + } } -KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) +KERNEL_FQ void m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { + /** + * base + */ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); if (gid >= gid_max) return; + // SCRYPT part, init V + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; uint4 X[STATE_CNT4]; - uint4 T[STATE_CNT4]; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]); + const u32 P_offset = salt_repeat * STATE_CNT4; - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]); + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - #if SCRYPT_P >= 1 - for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4) - { - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; +} - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]); - } - #endif +KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; + GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; + GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; + GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; + + uint4 X[STATE_CNT4]; + + const u32 P_offset = salt_repeat * STATE_CNT4; + + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; + + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; + + scrypt_smix_loop (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -405,35 +432,48 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) for (u32 l = 0; l < SCRYPT_CNT4; l += 4) { - uint4 tmp; - - tmp = tmps[gid].P[l + 0]; + uint4 X[4]; - w0[0] = tmp.x; - w0[1] = tmp.y; - w0[2] = tmp.z; - w0[3] = tmp.w; + X[0] = tmps[gid].P[l + 0]; + X[1] = tmps[gid].P[l + 1]; + X[2] = tmps[gid].P[l + 2]; + X[3] = tmps[gid].P[l + 3]; - tmp = tmps[gid].P[l + 1]; + uint4 T[4]; - w1[0] = tmp.x; - w1[1] = tmp.y; - w1[2] = tmp.z; - w1[3] = tmp.w; - - tmp = tmps[gid].P[l + 2]; - - w2[0] = tmp.x; - w2[1] = tmp.y; - w2[2] = tmp.z; - w2[3] = tmp.w; - - tmp = tmps[gid].P[l + 3]; + #ifdef IS_CUDA + T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #else + T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w); + #endif - w3[0] = tmp.x; - w3[1] = tmp.y; - w3[2] = tmp.z; - w3[3] = tmp.w; + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + w0[0] = T[0].x; + w0[1] = T[0].y; + w0[2] = T[0].z; + w0[3] = T[0].w; + w1[0] = T[1].x; + w1[1] = T[1].y; + w1[2] = T[1].z; + w1[3] = T[1].w; + w2[0] = T[2].x; + w2[1] = T[2].y; + w2[2] = T[2].z; + w2[3] = T[2].w; + w3[0] = T[3].x; + w3[1] = T[3].y; + w3[2] = T[3].z; + w3[3] = T[3].w; sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } diff --git a/OpenCL/m09700_a3-optimized.cl b/OpenCL/m09700_a3-optimized.cl index bd4f07ee0..84fbc8929 100644 --- a/OpenCL/m09700_a3-optimized.cl +++ b/OpenCL/m09700_a3-optimized.cl @@ -1014,7 +1014,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1063,7 +1063,7 @@ KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_m16 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1116,7 +1116,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1165,7 +1165,7 @@ KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_s16 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m09710_a3-optimized.cl b/OpenCL/m09710_a3-optimized.cl index 763826790..015a579f3 100644 --- a/OpenCL/m09710_a3-optimized.cl +++ b/OpenCL/m09710_a3-optimized.cl @@ -423,7 +423,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09710m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09710m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09710_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -480,7 +480,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09710s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09710s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09710_s08 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m09720_a3-optimized.cl b/OpenCL/m09720_a3-optimized.cl index 938c81532..50cb08962 100644 --- a/OpenCL/m09720_a3-optimized.cl +++ b/OpenCL/m09720_a3-optimized.cl @@ -603,7 +603,7 @@ KERNEL_FQ void m09720_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -650,7 +650,7 @@ KERNEL_FQ void m09720_m08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_m16 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -701,7 +701,7 @@ KERNEL_FQ void m09720_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_s08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -748,7 +748,7 @@ KERNEL_FQ void m09720_s08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_s16 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m09800_a0-optimized.cl b/OpenCL/m09800_a0-optimized.cl index 1aba657ed..4b57144ae 100644 --- a/OpenCL/m09800_a0-optimized.cl +++ b/OpenCL/m09800_a0-optimized.cl @@ -426,7 +426,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } @@ -725,7 +725,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m09800_a1-optimized.cl b/OpenCL/m09800_a1-optimized.cl index b2e328aab..f37773596 100644 --- a/OpenCL/m09800_a1-optimized.cl +++ b/OpenCL/m09800_a1-optimized.cl @@ -474,7 +474,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } @@ -823,7 +823,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m09800_a3-optimized.cl b/OpenCL/m09800_a3-optimized.cl index 18d8ac712..caebeeb4a 100644 --- a/OpenCL/m09800_a3-optimized.cl +++ b/OpenCL/m09800_a3-optimized.cl @@ -397,7 +397,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } @@ -664,7 +664,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -717,7 +717,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -766,7 +766,7 @@ KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -815,7 +815,7 @@ KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -864,7 +864,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -913,7 +913,7 @@ KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -962,5 +962,5 @@ KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09810_a3-optimized.cl b/OpenCL/m09810_a3-optimized.cl index b14397185..9287d419e 100644 --- a/OpenCL/m09810_a3-optimized.cl +++ b/OpenCL/m09810_a3-optimized.cl @@ -395,7 +395,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -444,7 +444,7 @@ KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -493,7 +493,7 @@ KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -542,7 +542,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -591,7 +591,7 @@ KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -640,5 +640,5 @@ KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09820_a0-optimized.cl b/OpenCL/m09820_a0-optimized.cl index 99ff2da70..3101d2ee6 100644 --- a/OpenCL/m09820_a0-optimized.cl +++ b/OpenCL/m09820_a0-optimized.cl @@ -370,7 +370,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } @@ -611,7 +611,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m09820_a1-optimized.cl b/OpenCL/m09820_a1-optimized.cl index 62b602cdf..578d971bc 100644 --- a/OpenCL/m09820_a1-optimized.cl +++ b/OpenCL/m09820_a1-optimized.cl @@ -418,7 +418,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } @@ -709,7 +709,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m09820_a3-optimized.cl b/OpenCL/m09820_a3-optimized.cl index 73453df76..0b0845d18 100644 --- a/OpenCL/m09820_a3-optimized.cl +++ b/OpenCL/m09820_a3-optimized.cl @@ -342,7 +342,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } @@ -549,7 +549,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -602,7 +602,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -651,7 +651,7 @@ KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -700,7 +700,7 @@ KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -749,7 +749,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -798,7 +798,7 @@ KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -847,5 +847,5 @@ KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09900_a3-optimized.cl b/OpenCL/m09900_a3-optimized.cl index d8ec338d2..a620418c9 100644 --- a/OpenCL/m09900_a3-optimized.cl +++ b/OpenCL/m09900_a3-optimized.cl @@ -626,7 +626,7 @@ KERNEL_FQ void m09900_m04 (KERN_ATTR_VECTOR ()) * main */ - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_m08 (KERN_ATTR_VECTOR ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m09900_m08 (KERN_ATTR_VECTOR ()) * main */ - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_m16 (KERN_ATTR_VECTOR ()) @@ -702,7 +702,7 @@ KERNEL_FQ void m09900_m16 (KERN_ATTR_VECTOR ()) * main */ - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_s04 (KERN_ATTR_VECTOR ()) @@ -740,7 +740,7 @@ KERNEL_FQ void m09900_s04 (KERN_ATTR_VECTOR ()) * main */ - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_s08 (KERN_ATTR_VECTOR ()) @@ -778,7 +778,7 @@ KERNEL_FQ void m09900_s08 (KERN_ATTR_VECTOR ()) * main */ - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_s16 (KERN_ATTR_VECTOR ()) @@ -816,5 +816,5 @@ KERNEL_FQ void m09900_s16 (KERN_ATTR_VECTOR ()) * main */ - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10100_a3-optimized.cl b/OpenCL/m10100_a3-optimized.cl index b39c87657..08702bf00 100644 --- a/OpenCL/m10100_a3-optimized.cl +++ b/OpenCL/m10100_a3-optimized.cl @@ -264,7 +264,7 @@ KERNEL_FQ void m10100_m04 (KERN_ATTR_VECTOR ()) * main */ - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_m08 (KERN_ATTR_VECTOR ()) @@ -302,7 +302,7 @@ KERNEL_FQ void m10100_m08 (KERN_ATTR_VECTOR ()) * main */ - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_m16 (KERN_ATTR_VECTOR ()) @@ -340,7 +340,7 @@ KERNEL_FQ void m10100_m16 (KERN_ATTR_VECTOR ()) * main */ - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_s04 (KERN_ATTR_VECTOR ()) @@ -378,7 +378,7 @@ KERNEL_FQ void m10100_s04 (KERN_ATTR_VECTOR ()) * main */ - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_s08 (KERN_ATTR_VECTOR ()) @@ -416,7 +416,7 @@ KERNEL_FQ void m10100_s08 (KERN_ATTR_VECTOR ()) * main */ - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_s16 (KERN_ATTR_VECTOR ()) @@ -454,5 +454,5 @@ KERNEL_FQ void m10100_s16 (KERN_ATTR_VECTOR ()) * main */ - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10400_a3-optimized.cl b/OpenCL/m10400_a3-optimized.cl index 5dea59403..509c1304c 100644 --- a/OpenCL/m10400_a3-optimized.cl +++ b/OpenCL/m10400_a3-optimized.cl @@ -546,7 +546,7 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10400_m08 (KERN_ATTR_ESALT (pdf_t)) @@ -595,7 +595,7 @@ KERNEL_FQ void m10400_m08 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10400_m16 (KERN_ATTR_ESALT (pdf_t)) @@ -644,7 +644,7 @@ KERNEL_FQ void m10400_m16 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) @@ -693,7 +693,7 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10400_s08 (KERN_ATTR_ESALT (pdf_t)) @@ -742,7 +742,7 @@ KERNEL_FQ void m10400_s08 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10400_s16 (KERN_ATTR_ESALT (pdf_t)) @@ -791,5 +791,5 @@ KERNEL_FQ void m10400_s16 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl index 09b756e7c..5c77f6db6 100644 --- a/OpenCL/m10410_a3-optimized.cl +++ b/OpenCL/m10410_a3-optimized.cl @@ -300,7 +300,7 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) @@ -349,7 +349,7 @@ KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) @@ -398,7 +398,7 @@ KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) @@ -447,7 +447,7 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) @@ -496,7 +496,7 @@ KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) @@ -545,5 +545,5 @@ KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) LOCAL_VK RC4_KEY rc4_keys[64]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10420_a3-optimized.cl b/OpenCL/m10420_a3-optimized.cl index 77db44040..c30cf75cf 100644 --- a/OpenCL/m10420_a3-optimized.cl +++ b/OpenCL/m10420_a3-optimized.cl @@ -409,7 +409,7 @@ KERNEL_FQ void m10420_m04 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_m08 (KERN_ATTR_ESALT (pdf_t)) @@ -456,7 +456,7 @@ KERNEL_FQ void m10420_m08 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_m16 (KERN_ATTR_ESALT (pdf_t)) @@ -503,7 +503,7 @@ KERNEL_FQ void m10420_m16 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_s04 (KERN_ATTR_ESALT (pdf_t)) @@ -550,7 +550,7 @@ KERNEL_FQ void m10420_s04 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_s08 (KERN_ATTR_ESALT (pdf_t)) @@ -597,7 +597,7 @@ KERNEL_FQ void m10420_s08 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_s16 (KERN_ATTR_ESALT (pdf_t)) @@ -644,5 +644,5 @@ KERNEL_FQ void m10420_s16 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10700-pure.cl b/OpenCL/m10700-pure.cl index 901b04a91..94254a0d9 100644 --- a/OpenCL/m10700-pure.cl +++ b/OpenCL/m10700-pure.cl @@ -88,7 +88,7 @@ DECLSPEC u32 sha256_update_aes_64 (sha256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, { u32 ex = 0; - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; ctx->len += len; @@ -261,7 +261,7 @@ DECLSPEC u32 sha384_update_aes_128 (sha384_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2 { u32 ex = 0; - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; ctx->len += len; @@ -554,7 +554,7 @@ DECLSPEC u32 sha512_update_aes_128 (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2 { u32 ex = 0; - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; ctx->len += len; diff --git a/OpenCL/m10800_a3-optimized.cl b/OpenCL/m10800_a3-optimized.cl index c88072358..922404d67 100644 --- a/OpenCL/m10800_a3-optimized.cl +++ b/OpenCL/m10800_a3-optimized.cl @@ -297,7 +297,7 @@ KERNEL_FQ void m10800_m04 (KERN_ATTR_VECTOR ()) * main */ - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_m08 (KERN_ATTR_VECTOR ()) @@ -335,7 +335,7 @@ KERNEL_FQ void m10800_m08 (KERN_ATTR_VECTOR ()) * main */ - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_m16 (KERN_ATTR_VECTOR ()) @@ -373,7 +373,7 @@ KERNEL_FQ void m10800_m16 (KERN_ATTR_VECTOR ()) * main */ - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_s04 (KERN_ATTR_VECTOR ()) @@ -411,7 +411,7 @@ KERNEL_FQ void m10800_s04 (KERN_ATTR_VECTOR ()) * main */ - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_s08 (KERN_ATTR_VECTOR ()) @@ -449,7 +449,7 @@ KERNEL_FQ void m10800_s08 (KERN_ATTR_VECTOR ()) * main */ - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_s16 (KERN_ATTR_VECTOR ()) @@ -487,5 +487,5 @@ KERNEL_FQ void m10800_s16 (KERN_ATTR_VECTOR ()) * main */ - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10810_a0-optimized.cl b/OpenCL/m10810_a0-optimized.cl new file mode 100644 index 000000000..e5acbd5a4 --- /dev/null +++ b/OpenCL/m10810_a0-optimized.cl @@ -0,0 +1,483 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10810_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10810_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10810_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha384 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10810_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10810_a0-pure.cl b/OpenCL/m10810_a0-pure.cl new file mode 100644 index 000000000..70678f4fd --- /dev/null +++ b/OpenCL/m10810_a0-pure.cl @@ -0,0 +1,139 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10810_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10810_a1-optimized.cl b/OpenCL/m10810_a1-optimized.cl new file mode 100644 index 000000000..02a0dc838 --- /dev/null +++ b/OpenCL/m10810_a1-optimized.cl @@ -0,0 +1,601 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10810_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10810_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10810_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10810_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10810_a1-pure.cl b/OpenCL/m10810_a1-pure.cl new file mode 100644 index 000000000..d9ab6f013 --- /dev/null +++ b/OpenCL/m10810_a1-pure.cl @@ -0,0 +1,133 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10810_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10810_a3-optimized.cl b/OpenCL/m10810_a3-optimized.cl new file mode 100644 index 000000000..d00582895 --- /dev/null +++ b/OpenCL/m10810_a3-optimized.cl @@ -0,0 +1,542 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10810m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + + w[ 0] |= hc_swap32_S (salt_buf0[0]); + w[ 1] |= hc_swap32_S (salt_buf0[1]); + w[ 2] |= hc_swap32_S (salt_buf0[2]); + w[ 3] |= hc_swap32_S (salt_buf0[3]); + w[ 4] |= hc_swap32_S (salt_buf1[0]); + w[ 5] |= hc_swap32_S (salt_buf1[1]); + w[ 6] |= hc_swap32_S (salt_buf1[2]); + w[ 7] |= hc_swap32_S (salt_buf1[3]); + w[ 8] |= hc_swap32_S (salt_buf2[0]); + w[ 9] |= hc_swap32_S (salt_buf2[1]); + w[10] |= hc_swap32_S (salt_buf2[2]); + w[11] |= hc_swap32_S (salt_buf2[3]); + w[12] |= hc_swap32_S (salt_buf3[0]); + w[13] |= hc_swap32_S (salt_buf3[1]); + w[14] |= hc_swap32_S (salt_buf3[2]); + w[15] |= hc_swap32_S (salt_buf3[3]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + w[15] = pw_salt_len * 8; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10810s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10810_a3-pure.cl b/OpenCL/m10810_a3-pure.cl new file mode 100644 index 000000000..2987d6e9e --- /dev/null +++ b/OpenCL/m10810_a3-pure.cl @@ -0,0 +1,159 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10810_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10820_a0-optimized.cl b/OpenCL/m10820_a0-optimized.cl new file mode 100644 index 000000000..0ff03a34b --- /dev/null +++ b/OpenCL/m10820_a0-optimized.cl @@ -0,0 +1,443 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10820_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10820_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10820_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha384 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10820_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10820_a0-pure.cl b/OpenCL/m10820_a0-pure.cl new file mode 100644 index 000000000..93d3647dc --- /dev/null +++ b/OpenCL/m10820_a0-pure.cl @@ -0,0 +1,125 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10820_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10820_a1-optimized.cl b/OpenCL/m10820_a1-optimized.cl new file mode 100644 index 000000000..157ddb4ae --- /dev/null +++ b/OpenCL/m10820_a1-optimized.cl @@ -0,0 +1,557 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10820_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10820_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10820_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10820_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10820_a1-pure.cl b/OpenCL/m10820_a1-pure.cl new file mode 100644 index 000000000..6a57af75c --- /dev/null +++ b/OpenCL/m10820_a1-pure.cl @@ -0,0 +1,124 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10820_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10820_a3-optimized.cl b/OpenCL/m10820_a3-optimized.cl new file mode 100644 index 000000000..076558f7f --- /dev/null +++ b/OpenCL/m10820_a3-optimized.cl @@ -0,0 +1,597 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10820m (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10820s (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10820_a3-pure.cl b/OpenCL/m10820_a3-pure.cl new file mode 100644 index 000000000..310502a4e --- /dev/null +++ b/OpenCL/m10820_a3-pure.cl @@ -0,0 +1,149 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10820_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10830_a0-optimized.cl b/OpenCL/m10830_a0-optimized.cl new file mode 100644 index 000000000..d4aaafc21 --- /dev/null +++ b/OpenCL/m10830_a0-optimized.cl @@ -0,0 +1,493 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10830_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len2); + + const u32x pw_salt_len = out_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10830_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10830_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len2); + + const u32x pw_salt_len = out_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10830_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10830_a0-pure.cl b/OpenCL/m10830_a0-pure.cl new file mode 100644 index 000000000..ee34bce80 --- /dev/null +++ b/OpenCL/m10830_a0-pure.cl @@ -0,0 +1,139 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10830_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10830_a1-optimized.cl b/OpenCL/m10830_a1-optimized.cl new file mode 100644 index 000000000..02af3fd49 --- /dev/null +++ b/OpenCL/m10830_a1-optimized.cl @@ -0,0 +1,611 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10830_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10830_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10830_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10830_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10830_a1-pure.cl b/OpenCL/m10830_a1-pure.cl new file mode 100644 index 000000000..817c9502d --- /dev/null +++ b/OpenCL/m10830_a1-pure.cl @@ -0,0 +1,133 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10830_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10830_a3-optimized.cl b/OpenCL/m10830_a3-optimized.cl new file mode 100644 index 000000000..da9547a88 --- /dev/null +++ b/OpenCL/m10830_a3-optimized.cl @@ -0,0 +1,542 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10830m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + + w[ 0] |= hc_swap32_S (salt_buf0[0]); + w[ 1] |= hc_swap32_S (salt_buf0[1]); + w[ 2] |= hc_swap32_S (salt_buf0[2]); + w[ 3] |= hc_swap32_S (salt_buf0[3]); + w[ 4] |= hc_swap32_S (salt_buf1[0]); + w[ 5] |= hc_swap32_S (salt_buf1[1]); + w[ 6] |= hc_swap32_S (salt_buf1[2]); + w[ 7] |= hc_swap32_S (salt_buf1[3]); + w[ 8] |= hc_swap32_S (salt_buf2[0]); + w[ 9] |= hc_swap32_S (salt_buf2[1]); + w[10] |= hc_swap32_S (salt_buf2[2]); + w[11] |= hc_swap32_S (salt_buf2[3]); + w[12] |= hc_swap32_S (salt_buf3[0]); + w[13] |= hc_swap32_S (salt_buf3[1]); + w[14] |= hc_swap32_S (salt_buf3[2]); + w[15] |= hc_swap32_S (salt_buf3[3]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + w[15] = pw_salt_len * 8; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10830s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10830_a3-pure.cl b/OpenCL/m10830_a3-pure.cl new file mode 100644 index 000000000..c59d649f2 --- /dev/null +++ b/OpenCL/m10830_a3-pure.cl @@ -0,0 +1,159 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10830_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10840_a0-optimized.cl b/OpenCL/m10840_a0-optimized.cl new file mode 100644 index 000000000..c7da41511 --- /dev/null +++ b/OpenCL/m10840_a0-optimized.cl @@ -0,0 +1,453 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10840_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len2 + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10840_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10840_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len2 + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10840_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10840_a0-pure.cl b/OpenCL/m10840_a0-pure.cl new file mode 100644 index 000000000..1bdec9010 --- /dev/null +++ b/OpenCL/m10840_a0-pure.cl @@ -0,0 +1,125 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10840_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10840_a1-optimized.cl b/OpenCL/m10840_a1-optimized.cl new file mode 100644 index 000000000..5833df42a --- /dev/null +++ b/OpenCL/m10840_a1-optimized.cl @@ -0,0 +1,567 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10840_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10840_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10840_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10840_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10840_a1-pure.cl b/OpenCL/m10840_a1-pure.cl new file mode 100644 index 000000000..9e964a1e9 --- /dev/null +++ b/OpenCL/m10840_a1-pure.cl @@ -0,0 +1,115 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10840_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10840_a3-optimized.cl b/OpenCL/m10840_a3-optimized.cl new file mode 100644 index 000000000..965e2dea5 --- /dev/null +++ b/OpenCL/m10840_a3-optimized.cl @@ -0,0 +1,597 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10840m (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10840s (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10840_a3-pure.cl b/OpenCL/m10840_a3-pure.cl new file mode 100644 index 000000000..b6cc88c68 --- /dev/null +++ b/OpenCL/m10840_a3-pure.cl @@ -0,0 +1,149 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10840_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m11000_a3-optimized.cl b/OpenCL/m11000_a3-optimized.cl index b621ce502..2f99175f4 100644 --- a/OpenCL/m11000_a3-optimized.cl +++ b/OpenCL/m11000_a3-optimized.cl @@ -600,7 +600,7 @@ KERNEL_FQ void m11000_m04 (KERN_ATTR_BASIC ()) * main */ - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_m08 (KERN_ATTR_BASIC ()) @@ -647,7 +647,7 @@ KERNEL_FQ void m11000_m08 (KERN_ATTR_BASIC ()) * main */ - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_m16 (KERN_ATTR_BASIC ()) @@ -694,7 +694,7 @@ KERNEL_FQ void m11000_m16 (KERN_ATTR_BASIC ()) * main */ - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_s04 (KERN_ATTR_BASIC ()) @@ -741,7 +741,7 @@ KERNEL_FQ void m11000_s04 (KERN_ATTR_BASIC ()) * main */ - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_s08 (KERN_ATTR_BASIC ()) @@ -788,7 +788,7 @@ KERNEL_FQ void m11000_s08 (KERN_ATTR_BASIC ()) * main */ - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_s16 (KERN_ATTR_BASIC ()) @@ -835,5 +835,5 @@ KERNEL_FQ void m11000_s16 (KERN_ATTR_BASIC ()) * main */ - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11100_a3-optimized.cl b/OpenCL/m11100_a3-optimized.cl index 8853241aa..45afc2489 100644 --- a/OpenCL/m11100_a3-optimized.cl +++ b/OpenCL/m11100_a3-optimized.cl @@ -718,7 +718,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ()) * main */ - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_m08 (KERN_ATTR_BASIC ()) @@ -788,7 +788,7 @@ KERNEL_FQ void m11100_m08 (KERN_ATTR_BASIC ()) * main */ - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_m16 (KERN_ATTR_BASIC ()) @@ -858,7 +858,7 @@ KERNEL_FQ void m11100_m16 (KERN_ATTR_BASIC ()) * main */ - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ()) @@ -928,7 +928,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ()) * main */ - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_s08 (KERN_ATTR_BASIC ()) @@ -998,7 +998,7 @@ KERNEL_FQ void m11100_s08 (KERN_ATTR_BASIC ()) * main */ - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_s16 (KERN_ATTR_BASIC ()) @@ -1068,5 +1068,5 @@ KERNEL_FQ void m11100_s16 (KERN_ATTR_BASIC ()) * main */ - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m11200_a3-optimized.cl b/OpenCL/m11200_a3-optimized.cl index 6b5c77f30..8399f53a9 100644 --- a/OpenCL/m11200_a3-optimized.cl +++ b/OpenCL/m11200_a3-optimized.cl @@ -934,7 +934,7 @@ KERNEL_FQ void m11200_m04 (KERN_ATTR_BASIC ()) * main */ - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_m08 (KERN_ATTR_BASIC ()) @@ -985,7 +985,7 @@ KERNEL_FQ void m11200_m08 (KERN_ATTR_BASIC ()) * main */ - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_m16 (KERN_ATTR_BASIC ()) @@ -1036,7 +1036,7 @@ KERNEL_FQ void m11200_m16 (KERN_ATTR_BASIC ()) * main */ - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_s04 (KERN_ATTR_BASIC ()) @@ -1087,7 +1087,7 @@ KERNEL_FQ void m11200_s04 (KERN_ATTR_BASIC ()) * main */ - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_s08 (KERN_ATTR_BASIC ()) @@ -1138,7 +1138,7 @@ KERNEL_FQ void m11200_s08 (KERN_ATTR_BASIC ()) * main */ - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_s16 (KERN_ATTR_BASIC ()) @@ -1189,5 +1189,5 @@ KERNEL_FQ void m11200_s16 (KERN_ATTR_BASIC ()) * main */ - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl index b3c1eccd8..9097b766d 100644 --- a/OpenCL/m11300-pure.cl +++ b/OpenCL/m11300-pure.cl @@ -351,7 +351,7 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ if (out[2] == pad && out[3] == pad) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } diff --git a/OpenCL/m11500_a3-optimized.cl b/OpenCL/m11500_a3-optimized.cl index 71dd72ac1..3c9c1a120 100644 --- a/OpenCL/m11500_a3-optimized.cl +++ b/OpenCL/m11500_a3-optimized.cl @@ -296,7 +296,7 @@ KERNEL_FQ void m11500_m04 (KERN_ATTR_BASIC ()) * main */ - m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_m08 (KERN_ATTR_BASIC ()) @@ -334,7 +334,7 @@ KERNEL_FQ void m11500_m08 (KERN_ATTR_BASIC ()) * main */ - m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_m16 (KERN_ATTR_BASIC ()) @@ -372,7 +372,7 @@ KERNEL_FQ void m11500_m16 (KERN_ATTR_BASIC ()) * main */ - m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_s04 (KERN_ATTR_BASIC ()) @@ -410,7 +410,7 @@ KERNEL_FQ void m11500_s04 (KERN_ATTR_BASIC ()) * main */ - m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_s08 (KERN_ATTR_BASIC ()) @@ -448,7 +448,7 @@ KERNEL_FQ void m11500_s08 (KERN_ATTR_BASIC ()) * main */ - m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_s16 (KERN_ATTR_BASIC ()) @@ -486,5 +486,5 @@ KERNEL_FQ void m11500_s16 (KERN_ATTR_BASIC ()) * main */ - m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11600-optimized.cl b/OpenCL/m11600-optimized.cl index 0f8f77a3e..05571d285 100644 --- a/OpenCL/m11600-optimized.cl +++ b/OpenCL/m11600-optimized.cl @@ -245,7 +245,7 @@ KERNEL_FQ void m11600_comp (KERN_ATTR_TMPS_HOOKS (seven_zip_tmp_t, seven_zip_hoo if (hooks[gid].hook_success == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index 670caea19..469365ae6 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -346,7 +346,7 @@ KERNEL_FQ void m11600_comp (KERN_ATTR_TMPS_HOOKS (seven_zip_tmp_t, seven_zip_hoo if (hooks[gid].hook_success == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m11700_a3-optimized.cl b/OpenCL/m11700_a3-optimized.cl index c704a0313..c7e5288f3 100644 --- a/OpenCL/m11700_a3-optimized.cl +++ b/OpenCL/m11700_a3-optimized.cl @@ -315,7 +315,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_BASIC ()) * main */ - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_m08 (KERN_ATTR_BASIC ()) @@ -379,7 +379,7 @@ KERNEL_FQ void m11700_m08 (KERN_ATTR_BASIC ()) * main */ - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_m16 (KERN_ATTR_BASIC ()) @@ -443,7 +443,7 @@ KERNEL_FQ void m11700_m16 (KERN_ATTR_BASIC ()) * main */ - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ()) * main */ - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_s08 (KERN_ATTR_BASIC ()) @@ -571,7 +571,7 @@ KERNEL_FQ void m11700_s08 (KERN_ATTR_BASIC ()) * main */ - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_s16 (KERN_ATTR_BASIC ()) @@ -635,5 +635,5 @@ KERNEL_FQ void m11700_s16 (KERN_ATTR_BASIC ()) * main */ - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11800_a3-optimized.cl b/OpenCL/m11800_a3-optimized.cl index 40799ff69..68259c3b5 100644 --- a/OpenCL/m11800_a3-optimized.cl +++ b/OpenCL/m11800_a3-optimized.cl @@ -315,7 +315,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_BASIC ()) * main */ - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_m08 (KERN_ATTR_BASIC ()) @@ -379,7 +379,7 @@ KERNEL_FQ void m11800_m08 (KERN_ATTR_BASIC ()) * main */ - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_m16 (KERN_ATTR_BASIC ()) @@ -443,7 +443,7 @@ KERNEL_FQ void m11800_m16 (KERN_ATTR_BASIC ()) * main */ - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ()) * main */ - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_s08 (KERN_ATTR_BASIC ()) @@ -571,7 +571,7 @@ KERNEL_FQ void m11800_s08 (KERN_ATTR_BASIC ()) * main */ - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_s16 (KERN_ATTR_BASIC ()) @@ -635,5 +635,5 @@ KERNEL_FQ void m11800_s16 (KERN_ATTR_BASIC ()) * main */ - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m12500-optimized.cl b/OpenCL/m12500-optimized.cl index cc26814e3..63ff55dd5 100644 --- a/OpenCL/m12500-optimized.cl +++ b/OpenCL/m12500-optimized.cl @@ -54,15 +54,20 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) if (gid >= gid_max) return; - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); u32 salt_buf[2]; @@ -81,7 +86,7 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) for (u32 i = 0, p = 0; i < 64; i++) { - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j)); } @@ -96,9 +101,9 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) p += 3; } - const u32 p2 = (pw_len * 2) + salt_len; + const u32 p2 = pw_len + salt_len; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p3 = pw_len + salt_len + 3; const u32 init_pos = loop_pos / (ROUNDS / 16); @@ -152,23 +157,15 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) u32 tmp0 = 0; u32 tmp1 = 0; - switch (k & 3) - { - case 0: tmp0 = iter_s >> 0; mask0 = 0x0000ffff; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 1: tmp0 = iter_s >> 8; mask0 = 0xff0000ff; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 2: tmp0 = iter_s >> 16; mask0 = 0xffff0000; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 3: tmp0 = iter_s >> 24; mask0 = 0xffffff00; - tmp1 = iter_s << 8; mask1 = 0x00ffffff; - break; - } + const int kd = k / 4; + const int km = k & 3; + + if (km == 0) { tmp0 = iter_s >> 0; tmp1 = 0; mask0 = 0x0000ffff; mask1 = 0xffffffff; } + else if (km == 1) { tmp0 = iter_s >> 8; tmp1 = 0; mask0 = 0xff0000ff; mask1 = 0xffffffff; } + else if (km == 2) { tmp0 = iter_s >> 16; tmp1 = 0; mask0 = 0xffff0000; mask1 = 0xffffffff; } + else if (km == 3) { tmp0 = iter_s >> 24; tmp1 = iter_s << 8; mask0 = 0xffffff00; mask1 = 0x00ffffff; } - switch (k / 4) + switch (kd) { case 0: w[ 0] = (w[ 0] & mask0) | tmp0; w[ 1] = (w[ 1] & mask1) | tmp1; @@ -303,11 +300,11 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t)) * base */ - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); const u32 salt_len = 8; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p3 = pw_len + salt_len + 3; u32 w0[4]; u32 w1[4]; @@ -372,13 +369,18 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t)) for (int i = 0; i < 16; i++) { - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; //const u32 pw_len = pws[gid].pw_len; @@ -389,7 +391,7 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t)) //const u32 salt_len = 8; - //const u32 p3 = (pw_len * 2) + salt_len + 3; + //const u32 p3 = pw_len + salt_len + 3; u32 w[16]; @@ -412,7 +414,7 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t)) u32 p = 0; - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (w, p, GETCHAR (pw_buf, j)); } diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index 30700d89c..ce1b2cb53 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -53,54 +53,22 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co switch (div) { - case 0: w0[0] |= tmp0; - w0[1] = tmp1; - break; - case 1: w0[1] |= tmp0; - w0[2] = tmp1; - break; - case 2: w0[2] |= tmp0; - w0[3] = tmp1; - break; - case 3: w0[3] |= tmp0; - w1[0] = tmp1; - break; - case 4: w1[0] |= tmp0; - w1[1] = tmp1; - break; - case 5: w1[1] |= tmp0; - w1[2] = tmp1; - break; - case 6: w1[2] |= tmp0; - w1[3] = tmp1; - break; - case 7: w1[3] |= tmp0; - w2[0] = tmp1; - break; - case 8: w2[0] |= tmp0; - w2[1] = tmp1; - break; - case 9: w2[1] |= tmp0; - w2[2] = tmp1; - break; - case 10: w2[2] |= tmp0; - w2[3] = tmp1; - break; - case 11: w2[3] |= tmp0; - w3[0] = tmp1; - break; - case 12: w3[0] |= tmp0; - w3[1] = tmp1; - break; - case 13: w3[1] |= tmp0; - w3[2] = tmp1; - break; - case 14: w3[2] |= tmp0; - w3[3] = tmp1; - break; - case 15: w3[3] |= tmp0; - carry = tmp1; - break; + case 0: w0[0] |= tmp0; w0[1] = tmp1; break; + case 1: w0[1] |= tmp0; w0[2] = tmp1; break; + case 2: w0[2] |= tmp0; w0[3] = tmp1; break; + case 3: w0[3] |= tmp0; w1[0] = tmp1; break; + case 4: w1[0] |= tmp0; w1[1] = tmp1; break; + case 5: w1[1] |= tmp0; w1[2] = tmp1; break; + case 6: w1[2] |= tmp0; w1[3] = tmp1; break; + case 7: w1[3] |= tmp0; w2[0] = tmp1; break; + case 8: w2[0] |= tmp0; w2[1] = tmp1; break; + case 9: w2[1] |= tmp0; w2[2] = tmp1; break; + case 10: w2[2] |= tmp0; w2[3] = tmp1; break; + case 11: w2[3] |= tmp0; w3[0] = tmp1; break; + case 12: w3[0] |= tmp0; w3[1] = tmp1; break; + case 13: w3[1] |= tmp0; w3[2] = tmp1; break; + case 14: w3[2] |= tmp0; w3[3] = tmp1; break; + default: w3[3] |= tmp0; carry = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15: } const u32 new_len = func_len + 3; @@ -482,7 +450,9 @@ DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (bytes == 0) return; + + const int pos = ctx->len & 63; int len = 64; @@ -615,7 +585,9 @@ DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) u32 w2[4]; u32 w3[4]; - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; int pos1 = 0; int pos4 = 0; @@ -769,38 +741,17 @@ KERNEL_FQ void m12500_init (KERN_ATTR_TMPS (rar3_tmp_t)) const u32 pw_len = pws[gid].pw_len; - // first set the utf16le pass: - u32 w[80] = { 0 }; - for (u32 i = 0, j = 0, k = 0; i < pw_len; i += 16, j += 4, k += 8) + for (int i = 0, j = 0; i < pw_len; i += 4, j += 1) { - u32 a[4]; - - a[0] = pws[gid].i[j + 0]; - a[1] = pws[gid].i[j + 1]; - a[2] = pws[gid].i[j + 2]; - a[3] = pws[gid].i[j + 3]; - - u32 b[4]; - u32 c[4]; - - make_utf16le (a, b, c); - - w[k + 0] = hc_swap32_S (b[0]); - w[k + 1] = hc_swap32_S (b[1]); - w[k + 2] = hc_swap32_S (b[2]); - w[k + 3] = hc_swap32_S (b[3]); - w[k + 4] = hc_swap32_S (c[0]); - w[k + 5] = hc_swap32_S (c[1]); - w[k + 6] = hc_swap32_S (c[2]); - w[k + 7] = hc_swap32_S (c[3]); + w[j] = hc_swap32_S (pws[gid].i[j]); } // append salt: - const u32 salt_idx = (pw_len * 2) / 4; - const u32 salt_off = (pw_len * 2) & 3; + const u32 salt_idx = pw_len / 4; + const u32 salt_off = pw_len & 3; u32 salt_buf[3]; @@ -817,10 +768,9 @@ KERNEL_FQ void m12500_init (KERN_ATTR_TMPS (rar3_tmp_t)) salt_buf[0] = (salt_buf[0] >> 16); } - w[salt_idx] |= salt_buf[0]; - - w[salt_idx + 1] = salt_buf[1]; - w[salt_idx + 2] = salt_buf[2]; + w[salt_idx + 0] |= salt_buf[0]; + w[salt_idx + 1] = salt_buf[1]; + w[salt_idx + 2] = salt_buf[2]; // store initial w[] (pass and salt) in tmps: @@ -847,17 +797,17 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; - u32 w[80] = { 0 }; // 64 byte aligned + u32 w[80] = { 0 }; - for (u32 i = 0; i < 66; i++) // unroll ? + for (u32 i = 0; i < 66; i++) { w[i] = tmps[gid].w[i]; } @@ -884,7 +834,6 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) ctx_iv.len += 3; - // copy the context from ctx_iv to ctx: sha1_ctx_t ctx; @@ -945,7 +894,7 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) // only needed if pw_len > 28: - for (u32 i = 0; i < 66; i++) // unroll ? + for (u32 i = 0; i < 66; i++) { tmps[gid].w[i] = w[i]; } @@ -1014,11 +963,11 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t)) * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; diff --git a/OpenCL/m12600_a3-optimized.cl b/OpenCL/m12600_a3-optimized.cl index 5fb0e3985..f905d0e06 100644 --- a/OpenCL/m12600_a3-optimized.cl +++ b/OpenCL/m12600_a3-optimized.cl @@ -691,7 +691,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ()) * main */ - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_m08 (KERN_ATTR_BASIC ()) @@ -761,7 +761,7 @@ KERNEL_FQ void m12600_m08 (KERN_ATTR_BASIC ()) * main */ - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_m16 (KERN_ATTR_BASIC ()) @@ -831,7 +831,7 @@ KERNEL_FQ void m12600_m16 (KERN_ATTR_BASIC ()) * main */ - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ()) @@ -901,7 +901,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ()) * main */ - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_s08 (KERN_ATTR_BASIC ()) @@ -971,7 +971,7 @@ KERNEL_FQ void m12600_s08 (KERN_ATTR_BASIC ()) * main */ - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_s16 (KERN_ATTR_BASIC ()) @@ -1041,5 +1041,5 @@ KERNEL_FQ void m12600_s16 (KERN_ATTR_BASIC ()) * main */ - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m12800-pure.cl b/OpenCL/m12800-pure.cl index 2906291a5..8bc4c9554 100644 --- a/OpenCL/m12800-pure.cl +++ b/OpenCL/m12800-pure.cl @@ -154,6 +154,7 @@ KERNEL_FQ void m12800_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh #undef uint_to_hex_lower8 + // naive convert is fine here make_utf16le_S (w1, w2, w3); make_utf16le_S (w0, w0, w1); diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl index 7aa5723aa..78ec57ad9 100644 --- a/OpenCL/m13100_a0-optimized.cl +++ b/OpenCL/m13100_a0-optimized.cl @@ -657,7 +657,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -755,7 +755,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl index c0cf5a92b..85fb0f79c 100644 --- a/OpenCL/m13100_a0-pure.cl +++ b/OpenCL/m13100_a0-pure.cl @@ -441,7 +441,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -503,7 +503,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl index fb7c16f54..d9339b1df 100644 --- a/OpenCL/m13100_a1-optimized.cl +++ b/OpenCL/m13100_a1-optimized.cl @@ -704,7 +704,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -851,7 +851,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl index 0e43f21c3..1156f04a3 100644 --- a/OpenCL/m13100_a1-pure.cl +++ b/OpenCL/m13100_a1-pure.cl @@ -437,7 +437,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -497,7 +497,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl index 924e44beb..61617aa61 100644 --- a/OpenCL/m13100_a3-optimized.cl +++ b/OpenCL/m13100_a3-optimized.cl @@ -624,7 +624,7 @@ DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -681,7 +681,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -733,7 +733,7 @@ KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_m16 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -789,7 +789,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -841,7 +841,7 @@ KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_s16 (KERN_ATTR_ESALT (krb5tgs_t)) diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl index 99eb540a4..0ef44c00d 100644 --- a/OpenCL/m13100_a3-pure.cl +++ b/OpenCL/m13100_a3-pure.cl @@ -450,7 +450,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -523,7 +523,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m13200-pure.cl b/OpenCL/m13200-pure.cl index 4c0429984..9fdf908d0 100644 --- a/OpenCL/m13200-pure.cl +++ b/OpenCL/m13200-pure.cl @@ -227,7 +227,7 @@ KERNEL_FQ void m13200_comp (KERN_ATTR_TMPS (axcrypt_tmp_t)) if (tmps[gid].cipher[0] == 0xa6a6a6a6 && tmps[gid].cipher[1] == 0xa6a6a6a6) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m13300_a3-optimized.cl b/OpenCL/m13300_a3-optimized.cl index 2c120924e..87d6fcd83 100644 --- a/OpenCL/m13300_a3-optimized.cl +++ b/OpenCL/m13300_a3-optimized.cl @@ -563,7 +563,7 @@ KERNEL_FQ void m13300_m04 (KERN_ATTR_VECTOR ()) * main */ - m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_m08 (KERN_ATTR_VECTOR ()) @@ -601,7 +601,7 @@ KERNEL_FQ void m13300_m08 (KERN_ATTR_VECTOR ()) * main */ - m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_m16 (KERN_ATTR_VECTOR ()) @@ -639,7 +639,7 @@ KERNEL_FQ void m13300_m16 (KERN_ATTR_VECTOR ()) * main */ - m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_s04 (KERN_ATTR_VECTOR ()) @@ -677,7 +677,7 @@ KERNEL_FQ void m13300_s04 (KERN_ATTR_VECTOR ()) * main */ - m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_s08 (KERN_ATTR_VECTOR ()) @@ -715,7 +715,7 @@ KERNEL_FQ void m13300_s08 (KERN_ATTR_VECTOR ()) * main */ - m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_s16 (KERN_ATTR_VECTOR ()) @@ -753,5 +753,5 @@ KERNEL_FQ void m13300_s16 (KERN_ATTR_VECTOR ()) * main */ - m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m13500_a3-optimized.cl b/OpenCL/m13500_a3-optimized.cl index 07a068d63..ae2bc1d9a 100644 --- a/OpenCL/m13500_a3-optimized.cl +++ b/OpenCL/m13500_a3-optimized.cl @@ -867,7 +867,7 @@ KERNEL_FQ void m13500_m04 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_m08 (KERN_ATTR_ESALT (pstoken_t)) @@ -914,7 +914,7 @@ KERNEL_FQ void m13500_m08 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_m16 (KERN_ATTR_ESALT (pstoken_t)) @@ -961,7 +961,7 @@ KERNEL_FQ void m13500_m16 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_s04 (KERN_ATTR_ESALT (pstoken_t)) @@ -1008,7 +1008,7 @@ KERNEL_FQ void m13500_s04 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_s08 (KERN_ATTR_ESALT (pstoken_t)) @@ -1055,7 +1055,7 @@ KERNEL_FQ void m13500_s08 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_s16 (KERN_ATTR_ESALT (pstoken_t)) @@ -1102,5 +1102,5 @@ KERNEL_FQ void m13500_s16 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m13711-pure.cl b/OpenCL/m13711-pure.cl index e39c71e54..0602b1f7d 100644 --- a/OpenCL/m13711-pure.cl +++ b/OpenCL/m13711-pure.cl @@ -512,7 +512,7 @@ KERNEL_FQ void m13711_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -521,7 +521,7 @@ KERNEL_FQ void m13711_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13712-pure.cl b/OpenCL/m13712-pure.cl index 3caeedaa1..e8e0dc840 100644 --- a/OpenCL/m13712-pure.cl +++ b/OpenCL/m13712-pure.cl @@ -568,7 +568,7 @@ KERNEL_FQ void m13712_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -577,7 +577,7 @@ KERNEL_FQ void m13712_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -585,7 +585,7 @@ KERNEL_FQ void m13712_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13713-pure.cl b/OpenCL/m13713-pure.cl index f748f8b74..493fd68f1 100644 --- a/OpenCL/m13713-pure.cl +++ b/OpenCL/m13713-pure.cl @@ -638,7 +638,7 @@ KERNEL_FQ void m13713_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -647,7 +647,7 @@ KERNEL_FQ void m13713_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -655,7 +655,7 @@ KERNEL_FQ void m13713_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -663,7 +663,7 @@ KERNEL_FQ void m13713_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13721-pure.cl b/OpenCL/m13721-pure.cl index 4201b1ffa..4e9c3edaf 100644 --- a/OpenCL/m13721-pure.cl +++ b/OpenCL/m13721-pure.cl @@ -659,7 +659,7 @@ KERNEL_FQ void m13721_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -668,7 +668,7 @@ KERNEL_FQ void m13721_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13722-pure.cl b/OpenCL/m13722-pure.cl index b887da37e..f7cbbb83e 100644 --- a/OpenCL/m13722-pure.cl +++ b/OpenCL/m13722-pure.cl @@ -715,7 +715,7 @@ KERNEL_FQ void m13722_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -724,7 +724,7 @@ KERNEL_FQ void m13722_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -732,7 +732,7 @@ KERNEL_FQ void m13722_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13723-pure.cl b/OpenCL/m13723-pure.cl index 5e2a429c3..382d67be9 100644 --- a/OpenCL/m13723-pure.cl +++ b/OpenCL/m13723-pure.cl @@ -785,7 +785,7 @@ KERNEL_FQ void m13723_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -794,7 +794,7 @@ KERNEL_FQ void m13723_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -802,7 +802,7 @@ KERNEL_FQ void m13723_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -810,7 +810,7 @@ KERNEL_FQ void m13723_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13731-pure.cl b/OpenCL/m13731-pure.cl index 2a28b4e76..52e4b28e1 100644 --- a/OpenCL/m13731-pure.cl +++ b/OpenCL/m13731-pure.cl @@ -807,7 +807,7 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -816,7 +816,7 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13732-pure.cl b/OpenCL/m13732-pure.cl index dad2cc3ab..faeb8911b 100644 --- a/OpenCL/m13732-pure.cl +++ b/OpenCL/m13732-pure.cl @@ -863,7 +863,7 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -872,7 +872,7 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -880,7 +880,7 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13733-pure.cl b/OpenCL/m13733-pure.cl index 0d181be96..9d8e38195 100644 --- a/OpenCL/m13733-pure.cl +++ b/OpenCL/m13733-pure.cl @@ -933,7 +933,7 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -942,7 +942,7 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -950,7 +950,7 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -958,7 +958,7 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13751-pure.cl b/OpenCL/m13751-pure.cl index 22ce6b973..a609660be 100644 --- a/OpenCL/m13751-pure.cl +++ b/OpenCL/m13751-pure.cl @@ -600,7 +600,7 @@ KERNEL_FQ void m13751_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -609,7 +609,7 @@ KERNEL_FQ void m13751_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13752-pure.cl b/OpenCL/m13752-pure.cl index 5b3ec6ea9..ff3c0bc95 100644 --- a/OpenCL/m13752-pure.cl +++ b/OpenCL/m13752-pure.cl @@ -627,7 +627,7 @@ KERNEL_FQ void m13752_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -636,7 +636,7 @@ KERNEL_FQ void m13752_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -644,7 +644,7 @@ KERNEL_FQ void m13752_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13753-pure.cl b/OpenCL/m13753-pure.cl index 1a81e4960..65956c669 100644 --- a/OpenCL/m13753-pure.cl +++ b/OpenCL/m13753-pure.cl @@ -697,7 +697,7 @@ KERNEL_FQ void m13753_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -706,7 +706,7 @@ KERNEL_FQ void m13753_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -714,7 +714,7 @@ KERNEL_FQ void m13753_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -722,7 +722,7 @@ KERNEL_FQ void m13753_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13771-pure.cl b/OpenCL/m13771-pure.cl index 3f0d63249..8e66df13b 100644 --- a/OpenCL/m13771-pure.cl +++ b/OpenCL/m13771-pure.cl @@ -706,7 +706,7 @@ KERNEL_FQ void m13771_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -715,7 +715,7 @@ KERNEL_FQ void m13771_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13772-pure.cl b/OpenCL/m13772-pure.cl index 431841a39..14c853ecc 100644 --- a/OpenCL/m13772-pure.cl +++ b/OpenCL/m13772-pure.cl @@ -762,7 +762,7 @@ KERNEL_FQ void m13772_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -771,7 +771,7 @@ KERNEL_FQ void m13772_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -779,7 +779,7 @@ KERNEL_FQ void m13772_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13773-pure.cl b/OpenCL/m13773-pure.cl index 802b7b06f..16aee20ef 100644 --- a/OpenCL/m13773-pure.cl +++ b/OpenCL/m13773-pure.cl @@ -832,7 +832,7 @@ KERNEL_FQ void m13773_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -841,7 +841,7 @@ KERNEL_FQ void m13773_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -849,7 +849,7 @@ KERNEL_FQ void m13773_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } @@ -857,7 +857,7 @@ KERNEL_FQ void m13773_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index b1a1bffd3..9ad06a344 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -762,7 +762,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_m08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -819,7 +819,7 @@ KERNEL_FQ void m13800_m08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_m16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -876,7 +876,7 @@ KERNEL_FQ void m13800_m16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_s04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -933,7 +933,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_s08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -990,7 +990,7 @@ KERNEL_FQ void m13800_s08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_s16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -1047,5 +1047,5 @@ KERNEL_FQ void m13800_s16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m13900_a3-optimized.cl b/OpenCL/m13900_a3-optimized.cl index 8905e51f6..87121e5cd 100644 --- a/OpenCL/m13900_a3-optimized.cl +++ b/OpenCL/m13900_a3-optimized.cl @@ -482,7 +482,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ()) * main */ - m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_m08 (KERN_ATTR_BASIC ()) @@ -552,7 +552,7 @@ KERNEL_FQ void m13900_m08 (KERN_ATTR_BASIC ()) * main */ - m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_m16 (KERN_ATTR_BASIC ()) @@ -622,7 +622,7 @@ KERNEL_FQ void m13900_m16 (KERN_ATTR_BASIC ()) * main */ - m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ()) @@ -692,7 +692,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ()) * main */ - m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_s08 (KERN_ATTR_BASIC ()) @@ -762,7 +762,7 @@ KERNEL_FQ void m13900_s08 (KERN_ATTR_BASIC ()) * main */ - m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_s16 (KERN_ATTR_BASIC ()) @@ -832,5 +832,5 @@ KERNEL_FQ void m13900_s16 (KERN_ATTR_BASIC ()) * main */ - m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl index 687c6fb2a..f44e0bbfc 100644 --- a/OpenCL/m14000_a3-pure.cl +++ b/OpenCL/m14000_a3-pure.cl @@ -1744,13 +1744,13 @@ KERNEL_FQ void m14000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) #endif for (int i = 0, j = 0; i < 32; i += 8, j += 7) { - atomic_or (&words_buf_b[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice)); } } diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl index bf2a438f4..c85d58cd5 100644 --- a/OpenCL/m14100_a3-pure.cl +++ b/OpenCL/m14100_a3-pure.cl @@ -807,7 +807,7 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_BASIC ()) * main */ - m14100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m14100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ()) @@ -881,5 +881,5 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ()) * main */ - m14100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m14100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl index 8c698a798..39dcc3dd8 100644 --- a/OpenCL/m14400_a3-optimized.cl +++ b/OpenCL/m14400_a3-optimized.cl @@ -695,7 +695,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ()) * main */ - m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_m08 (KERN_ATTR_BASIC ()) @@ -765,7 +765,7 @@ KERNEL_FQ void m14400_m08 (KERN_ATTR_BASIC ()) * main */ - m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_m16 (KERN_ATTR_BASIC ()) @@ -835,7 +835,7 @@ KERNEL_FQ void m14400_m16 (KERN_ATTR_BASIC ()) * main */ - m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ()) @@ -905,7 +905,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ()) * main */ - m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_s08 (KERN_ATTR_BASIC ()) @@ -975,7 +975,7 @@ KERNEL_FQ void m14400_s08 (KERN_ATTR_BASIC ()) * main */ - m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_s16 (KERN_ATTR_BASIC ()) @@ -1045,5 +1045,5 @@ KERNEL_FQ void m14400_s16 (KERN_ATTR_BASIC ()) * main */ - m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m14511_a0-pure.cl b/OpenCL/m14511_a0-pure.cl new file mode 100644 index 000000000..52189158e --- /dev/null +++ b/OpenCL/m14511_a0-pure.cl @@ -0,0 +1,394 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14511_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14511_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14511_a1-pure.cl b/OpenCL/m14511_a1-pure.cl new file mode 100644 index 000000000..33e337030 --- /dev/null +++ b/OpenCL/m14511_a1-pure.cl @@ -0,0 +1,411 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14511_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14511_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14511_a3-pure.cl b/OpenCL/m14511_a3-pure.cl new file mode 100644 index 000000000..3e0afc6cf --- /dev/null +++ b/OpenCL/m14511_a3-pure.cl @@ -0,0 +1,397 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14511_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14511_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14512_a0-pure.cl b/OpenCL/m14512_a0-pure.cl new file mode 100644 index 000000000..8e42b89fb --- /dev/null +++ b/OpenCL/m14512_a0-pure.cl @@ -0,0 +1,321 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14512_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14512_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14512_a1-pure.cl b/OpenCL/m14512_a1-pure.cl new file mode 100644 index 000000000..161741e96 --- /dev/null +++ b/OpenCL/m14512_a1-pure.cl @@ -0,0 +1,339 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14512_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14512_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14512_a3-pure.cl b/OpenCL/m14512_a3-pure.cl new file mode 100644 index 000000000..47193000b --- /dev/null +++ b/OpenCL/m14512_a3-pure.cl @@ -0,0 +1,325 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14512_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14512_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14513_a0-pure.cl b/OpenCL/m14513_a0-pure.cl new file mode 100644 index 000000000..2e7c9919b --- /dev/null +++ b/OpenCL/m14513_a0-pure.cl @@ -0,0 +1,323 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14513_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14513_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14513_a1-pure.cl b/OpenCL/m14513_a1-pure.cl new file mode 100644 index 000000000..8bf808a3c --- /dev/null +++ b/OpenCL/m14513_a1-pure.cl @@ -0,0 +1,341 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14513_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14513_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14513_a3-pure.cl b/OpenCL/m14513_a3-pure.cl new file mode 100644 index 000000000..0cb812e59 --- /dev/null +++ b/OpenCL/m14513_a3-pure.cl @@ -0,0 +1,327 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14513_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14513_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14521_a0-pure.cl b/OpenCL/m14521_a0-pure.cl new file mode 100644 index 000000000..8607120f1 --- /dev/null +++ b/OpenCL/m14521_a0-pure.cl @@ -0,0 +1,356 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14521_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14521_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14521_a1-pure.cl b/OpenCL/m14521_a1-pure.cl new file mode 100644 index 000000000..eaf9af2a5 --- /dev/null +++ b/OpenCL/m14521_a1-pure.cl @@ -0,0 +1,345 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14521_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k4 = ctx.h[6]; + k5 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14521_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14521_a3-pure.cl b/OpenCL/m14521_a3-pure.cl new file mode 100644 index 000000000..17d48d002 --- /dev/null +++ b/OpenCL/m14521_a3-pure.cl @@ -0,0 +1,371 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14521_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14521_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap(&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14522_a0-pure.cl b/OpenCL/m14522_a0-pure.cl new file mode 100644 index 000000000..de12209b9 --- /dev/null +++ b/OpenCL/m14522_a0-pure.cl @@ -0,0 +1,283 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14522_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14522_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14522_a1-pure.cl b/OpenCL/m14522_a1-pure.cl new file mode 100644 index 000000000..f94d62ffc --- /dev/null +++ b/OpenCL/m14522_a1-pure.cl @@ -0,0 +1,273 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14522_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14522_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14522_a3-pure.cl b/OpenCL/m14522_a3-pure.cl new file mode 100644 index 000000000..d77dde0eb --- /dev/null +++ b/OpenCL/m14522_a3-pure.cl @@ -0,0 +1,299 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14522_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14522_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14523_a0-pure.cl b/OpenCL/m14523_a0-pure.cl new file mode 100644 index 000000000..5ded9cfbf --- /dev/null +++ b/OpenCL/m14523_a0-pure.cl @@ -0,0 +1,285 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14523_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14523_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14523_a1-pure.cl b/OpenCL/m14523_a1-pure.cl new file mode 100644 index 000000000..00daf2c86 --- /dev/null +++ b/OpenCL/m14523_a1-pure.cl @@ -0,0 +1,275 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14523_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14523_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14523_a3-pure.cl b/OpenCL/m14523_a3-pure.cl new file mode 100644 index 000000000..bc4a95e24 --- /dev/null +++ b/OpenCL/m14523_a3-pure.cl @@ -0,0 +1,301 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14523_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14523_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14531_a0-pure.cl b/OpenCL/m14531_a0-pure.cl new file mode 100644 index 000000000..c5c6ef740 --- /dev/null +++ b/OpenCL/m14531_a0-pure.cl @@ -0,0 +1,355 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14531_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14531_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14531_a1-pure.cl b/OpenCL/m14531_a1-pure.cl new file mode 100644 index 000000000..8190a9d56 --- /dev/null +++ b/OpenCL/m14531_a1-pure.cl @@ -0,0 +1,345 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14531_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14531_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14531_a3-pure.cl b/OpenCL/m14531_a3-pure.cl new file mode 100644 index 000000000..aa8a41d7a --- /dev/null +++ b/OpenCL/m14531_a3-pure.cl @@ -0,0 +1,371 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14531_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14531_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap(&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14532_a0-pure.cl b/OpenCL/m14532_a0-pure.cl new file mode 100644 index 000000000..e643fc81c --- /dev/null +++ b/OpenCL/m14532_a0-pure.cl @@ -0,0 +1,282 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14532_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14532_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14532_a1-pure.cl b/OpenCL/m14532_a1-pure.cl new file mode 100644 index 000000000..7d3b08b9a --- /dev/null +++ b/OpenCL/m14532_a1-pure.cl @@ -0,0 +1,273 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14532_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14532_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14532_a3-pure.cl b/OpenCL/m14532_a3-pure.cl new file mode 100644 index 000000000..ab3b0645b --- /dev/null +++ b/OpenCL/m14532_a3-pure.cl @@ -0,0 +1,299 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14532_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14532_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14533_a0-pure.cl b/OpenCL/m14533_a0-pure.cl new file mode 100644 index 000000000..353232f82 --- /dev/null +++ b/OpenCL/m14533_a0-pure.cl @@ -0,0 +1,285 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14533_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14533_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14533_a1-pure.cl b/OpenCL/m14533_a1-pure.cl new file mode 100644 index 000000000..ce4c4fae1 --- /dev/null +++ b/OpenCL/m14533_a1-pure.cl @@ -0,0 +1,275 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14533_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14533_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14533_a3-pure.cl b/OpenCL/m14533_a3-pure.cl new file mode 100644 index 000000000..41883f4b9 --- /dev/null +++ b/OpenCL/m14533_a3-pure.cl @@ -0,0 +1,301 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14533_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14533_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14541_a0-pure.cl b/OpenCL/m14541_a0-pure.cl new file mode 100644 index 000000000..2ed23a0ba --- /dev/null +++ b/OpenCL/m14541_a0-pure.cl @@ -0,0 +1,394 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14541_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14541_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14541_a1-pure.cl b/OpenCL/m14541_a1-pure.cl new file mode 100644 index 000000000..f18137677 --- /dev/null +++ b/OpenCL/m14541_a1-pure.cl @@ -0,0 +1,411 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14541_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = hc_swap32_S (ctx.h[0]); + const u32 k1 = hc_swap32_S (ctx.h[1]); + const u32 k2 = hc_swap32_S (ctx.h[2]); + const u32 k3 = hc_swap32_S (ctx.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx.h[4]); + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = hc_swap32_S (ctx0_tmp.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx0_tmp.h[1]); + k7 = hc_swap32_S (ctx0_tmp.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14541_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = hc_swap32_S (ctx.h[0]); + const u32 k1 = hc_swap32_S (ctx.h[1]); + const u32 k2 = hc_swap32_S (ctx.h[2]); + const u32 k3 = hc_swap32_S (ctx.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx.h[4]); + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = hc_swap32_S (ctx0_tmp.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx0_tmp.h[1]); + k7 = hc_swap32_S (ctx0_tmp.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14541_a3-pure.cl b/OpenCL/m14541_a3-pure.cl new file mode 100644 index 000000000..112b9d452 --- /dev/null +++ b/OpenCL/m14541_a3-pure.cl @@ -0,0 +1,397 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14541_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14541_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14542_a0-pure.cl b/OpenCL/m14542_a0-pure.cl new file mode 100644 index 000000000..bd1038fb9 --- /dev/null +++ b/OpenCL/m14542_a0-pure.cl @@ -0,0 +1,321 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14542_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14542_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14542_a1-pure.cl b/OpenCL/m14542_a1-pure.cl new file mode 100644 index 000000000..06ade940a --- /dev/null +++ b/OpenCL/m14542_a1-pure.cl @@ -0,0 +1,339 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14542_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14542_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14542_a3-pure.cl b/OpenCL/m14542_a3-pure.cl new file mode 100644 index 000000000..5e37f44b1 --- /dev/null +++ b/OpenCL/m14542_a3-pure.cl @@ -0,0 +1,325 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14542_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14542_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14543_a0-pure.cl b/OpenCL/m14543_a0-pure.cl new file mode 100644 index 000000000..13b0910c8 --- /dev/null +++ b/OpenCL/m14543_a0-pure.cl @@ -0,0 +1,323 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14543_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14543_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish setkey and encrypt + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14543_a1-pure.cl b/OpenCL/m14543_a1-pure.cl new file mode 100644 index 000000000..3db0f2791 --- /dev/null +++ b/OpenCL/m14543_a1-pure.cl @@ -0,0 +1,341 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14543_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14543_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14543_a3-pure.cl b/OpenCL/m14543_a3-pure.cl new file mode 100644 index 000000000..e8f8977fa --- /dev/null +++ b/OpenCL/m14543_a3-pure.cl @@ -0,0 +1,327 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14543_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14543_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14551_a0-pure.cl b/OpenCL/m14551_a0-pure.cl new file mode 100644 index 000000000..a726661eb --- /dev/null +++ b/OpenCL/m14551_a0-pure.cl @@ -0,0 +1,410 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14551_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14551_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14551_a1-pure.cl b/OpenCL/m14551_a1-pure.cl new file mode 100644 index 000000000..eecb18f89 --- /dev/null +++ b/OpenCL/m14551_a1-pure.cl @@ -0,0 +1,399 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14551_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14551_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14551_a3-pure.cl b/OpenCL/m14551_a3-pure.cl new file mode 100644 index 000000000..b6a048453 --- /dev/null +++ b/OpenCL/m14551_a3-pure.cl @@ -0,0 +1,425 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14551_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14551_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap(&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14552_a0-pure.cl b/OpenCL/m14552_a0-pure.cl new file mode 100644 index 000000000..2b8456281 --- /dev/null +++ b/OpenCL/m14552_a0-pure.cl @@ -0,0 +1,373 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14552_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14552_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14552_a1-pure.cl b/OpenCL/m14552_a1-pure.cl new file mode 100644 index 000000000..0f6968ba6 --- /dev/null +++ b/OpenCL/m14552_a1-pure.cl @@ -0,0 +1,363 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14552_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14552_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14552_a3-pure.cl b/OpenCL/m14552_a3-pure.cl new file mode 100644 index 000000000..906d6ae09 --- /dev/null +++ b/OpenCL/m14552_a3-pure.cl @@ -0,0 +1,389 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14552_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14552_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14553_a0-pure.cl b/OpenCL/m14553_a0-pure.cl new file mode 100644 index 000000000..98676a446 --- /dev/null +++ b/OpenCL/m14553_a0-pure.cl @@ -0,0 +1,375 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14553_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14553_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14553_a1-pure.cl b/OpenCL/m14553_a1-pure.cl new file mode 100644 index 000000000..d5e45a940 --- /dev/null +++ b/OpenCL/m14553_a1-pure.cl @@ -0,0 +1,365 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14553_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14553_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14553_a3-pure.cl b/OpenCL/m14553_a3-pure.cl new file mode 100644 index 000000000..f1b2eaccf --- /dev/null +++ b/OpenCL/m14553_a3-pure.cl @@ -0,0 +1,391 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14553_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14553_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14611-pure.cl b/OpenCL/m14611-pure.cl index 1432c0d0f..9827e2c4f 100644 --- a/OpenCL/m14611-pure.cl +++ b/OpenCL/m14611-pure.cl @@ -365,7 +365,7 @@ KERNEL_FQ void m14611_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14612-pure.cl b/OpenCL/m14612-pure.cl index d5f54a838..eb92f69d7 100644 --- a/OpenCL/m14612-pure.cl +++ b/OpenCL/m14612-pure.cl @@ -312,7 +312,7 @@ KERNEL_FQ void m14612_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14613-pure.cl b/OpenCL/m14613-pure.cl index fa63df342..db6fbb952 100644 --- a/OpenCL/m14613-pure.cl +++ b/OpenCL/m14613-pure.cl @@ -312,7 +312,7 @@ KERNEL_FQ void m14613_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14621-pure.cl b/OpenCL/m14621-pure.cl index 78d089f36..8a998bb05 100644 --- a/OpenCL/m14621-pure.cl +++ b/OpenCL/m14621-pure.cl @@ -404,7 +404,7 @@ KERNEL_FQ void m14621_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14622-pure.cl b/OpenCL/m14622-pure.cl index 8746eec97..5215be024 100644 --- a/OpenCL/m14622-pure.cl +++ b/OpenCL/m14622-pure.cl @@ -351,7 +351,7 @@ KERNEL_FQ void m14622_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14623-pure.cl b/OpenCL/m14623-pure.cl index 4e978f31f..e3b2b3e63 100644 --- a/OpenCL/m14623-pure.cl +++ b/OpenCL/m14623-pure.cl @@ -351,7 +351,7 @@ KERNEL_FQ void m14623_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14631-pure.cl b/OpenCL/m14631-pure.cl index 8f4cf4312..a2bec808f 100644 --- a/OpenCL/m14631-pure.cl +++ b/OpenCL/m14631-pure.cl @@ -460,7 +460,7 @@ KERNEL_FQ void m14631_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14632-pure.cl b/OpenCL/m14632-pure.cl index ad66382bb..f76b3a40e 100644 --- a/OpenCL/m14632-pure.cl +++ b/OpenCL/m14632-pure.cl @@ -407,7 +407,7 @@ KERNEL_FQ void m14632_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14633-pure.cl b/OpenCL/m14633-pure.cl index 8cec7bf47..5a72fdbe5 100644 --- a/OpenCL/m14633-pure.cl +++ b/OpenCL/m14633-pure.cl @@ -407,7 +407,7 @@ KERNEL_FQ void m14633_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14641-pure.cl b/OpenCL/m14641-pure.cl index c955a4529..87572c0fc 100644 --- a/OpenCL/m14641-pure.cl +++ b/OpenCL/m14641-pure.cl @@ -365,7 +365,7 @@ KERNEL_FQ void m14641_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14642-pure.cl b/OpenCL/m14642-pure.cl index 1fb2acc13..0988bee8f 100644 --- a/OpenCL/m14642-pure.cl +++ b/OpenCL/m14642-pure.cl @@ -312,7 +312,7 @@ KERNEL_FQ void m14642_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14643-pure.cl b/OpenCL/m14643-pure.cl index 27c7461df..246934d95 100644 --- a/OpenCL/m14643-pure.cl +++ b/OpenCL/m14643-pure.cl @@ -312,7 +312,7 @@ KERNEL_FQ void m14643_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14700-pure.cl b/OpenCL/m14700-pure.cl index e4f47da8a..a716d4f5d 100644 --- a/OpenCL/m14700-pure.cl +++ b/OpenCL/m14700-pure.cl @@ -379,7 +379,7 @@ KERNEL_FQ void m14700_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, itunes_back if ((cipher[0] == 0xa6a6a6a6) && (cipher[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14800-pure.cl b/OpenCL/m14800-pure.cl index c76a4e1a8..dcf2c956f 100644 --- a/OpenCL/m14800-pure.cl +++ b/OpenCL/m14800-pure.cl @@ -643,7 +643,7 @@ KERNEL_FQ void m14800_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, itunes_ba if ((cipher[0] == 0xa6a6a6a6) && (cipher[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m14900_a3-optimized.cl b/OpenCL/m14900_a3-optimized.cl index 47ce9452a..caf6a2ee0 100644 --- a/OpenCL/m14900_a3-optimized.cl +++ b/OpenCL/m14900_a3-optimized.cl @@ -274,7 +274,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_BASIC ()) * main */ - m14900m (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m14900m (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m14900_m08 (KERN_ATTR_BASIC ()) @@ -348,7 +348,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ()) * main */ - m14900s (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m14900s (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m14900_s08 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m15000_a3-optimized.cl b/OpenCL/m15000_a3-optimized.cl index c0dbea76c..8c1d20a77 100644 --- a/OpenCL/m15000_a3-optimized.cl +++ b/OpenCL/m15000_a3-optimized.cl @@ -478,7 +478,7 @@ KERNEL_FQ void m15000_m04 (KERN_ATTR_VECTOR ()) * main */ - m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_m08 (KERN_ATTR_VECTOR ()) @@ -516,7 +516,7 @@ KERNEL_FQ void m15000_m08 (KERN_ATTR_VECTOR ()) * main */ - m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_m16 (KERN_ATTR_VECTOR ()) @@ -554,7 +554,7 @@ KERNEL_FQ void m15000_m16 (KERN_ATTR_VECTOR ()) * main */ - m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_s04 (KERN_ATTR_VECTOR ()) @@ -592,7 +592,7 @@ KERNEL_FQ void m15000_s04 (KERN_ATTR_VECTOR ()) * main */ - m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_s08 (KERN_ATTR_VECTOR ()) @@ -630,7 +630,7 @@ KERNEL_FQ void m15000_s08 (KERN_ATTR_VECTOR ()) * main */ - m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_s16 (KERN_ATTR_VECTOR ()) @@ -668,5 +668,5 @@ KERNEL_FQ void m15000_s16 (KERN_ATTR_VECTOR ()) * main */ - m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m15300-pure.cl b/OpenCL/m15300-pure.cl index 77fafe668..5e59c5deb 100644 --- a/OpenCL/m15300-pure.cl +++ b/OpenCL/m15300-pure.cl @@ -676,7 +676,7 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) && (expected_key[2] == hc_swap32_S (ctx.opad.h[2])) && (expected_key[3] == hc_swap32_S (ctx.opad.h[3]))) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m15500_a3-optimized.cl b/OpenCL/m15500_a3-optimized.cl index 7afcfbace..1882bbf8f 100644 --- a/OpenCL/m15500_a3-optimized.cl +++ b/OpenCL/m15500_a3-optimized.cl @@ -621,7 +621,7 @@ KERNEL_FQ void m15500_m04 (KERN_ATTR_VECTOR ()) * main */ - m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_m08 (KERN_ATTR_VECTOR ()) @@ -659,7 +659,7 @@ KERNEL_FQ void m15500_m08 (KERN_ATTR_VECTOR ()) * main */ - m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_m16 (KERN_ATTR_VECTOR ()) @@ -697,7 +697,7 @@ KERNEL_FQ void m15500_m16 (KERN_ATTR_VECTOR ()) * main */ - m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_s04 (KERN_ATTR_VECTOR ()) @@ -735,7 +735,7 @@ KERNEL_FQ void m15500_s04 (KERN_ATTR_VECTOR ()) * main */ - m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_s08 (KERN_ATTR_VECTOR ()) @@ -773,7 +773,7 @@ KERNEL_FQ void m15500_s08 (KERN_ATTR_VECTOR ()) * main */ - m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_s16 (KERN_ATTR_VECTOR ()) @@ -811,5 +811,5 @@ KERNEL_FQ void m15500_s16 (KERN_ATTR_VECTOR ()) * main */ - m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl index c3e32fae9..4e46bb4e4 100644 --- a/OpenCL/m15700-pure.cl +++ b/OpenCL/m15700-pure.cl @@ -24,6 +24,13 @@ typedef struct } scrypt_tmp_t; +typedef struct ethereum_scrypt +{ + u32 salt_buf[16]; + u32 ciphertext[8]; + +} ethereum_scrypt_t; + #ifdef IS_CUDA inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } @@ -41,13 +48,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n) #endif -typedef struct ethereum_scrypt -{ - u32 salt_buf[16]; - u32 ciphertext[8]; - -} ethereum_scrypt_t; - DECLSPEC uint4 hc_swap32_4 (uint4 v) { return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u)); @@ -109,28 +109,8 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) } #endif -#define SALSA20_8_XOR() \ -{ \ - R0 = R0 ^ Y0; \ - R1 = R1 ^ Y1; \ - R2 = R2 ^ Y2; \ - R3 = R3 ^ Y3; \ - \ - uint4 X0 = R0; \ - uint4 X1 = R1; \ - uint4 X2 = R2; \ - uint4 X3 = R3; \ - \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - \ - R0 = R0 + X0; \ - R1 = R1 + X1; \ - R2 = R2 + X2; \ - R3 = R3 + X3; \ -} +#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) +#define CO Coord(xd4,y,z) DECLSPEC void salsa_r (uint4 *TI) { @@ -139,56 +119,72 @@ DECLSPEC void salsa_r (uint4 *TI) uint4 R2 = TI[STATE_CNT4 - 2]; uint4 R3 = TI[STATE_CNT4 - 1]; - uint4 TO[STATE_CNT4]; + for (int i = 0; i < STATE_CNT4; i += 4) + { + uint4 Y0 = TI[i + 0]; + uint4 Y1 = TI[i + 1]; + uint4 Y2 = TI[i + 2]; + uint4 Y3 = TI[i + 3]; + + R0 = R0 ^ Y0; + R1 = R1 ^ Y1; + R2 = R2 ^ Y2; + R3 = R3 ^ Y3; + + uint4 X0 = R0; + uint4 X1 = R1; + uint4 X2 = R2; + uint4 X3 = R3; + + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + + R0 = R0 + X0; + R1 = R1 + X1; + R2 = R2 + X2; + R3 = R3 + X3; + + TI[i + 0] = R0; + TI[i + 1] = R1; + TI[i + 2] = R2; + TI[i + 3] = R3; + } + + #if SCRYPT_R > 1 - int idx_y = 0; - int idx_r1 = 0; - int idx_r2 = SCRYPT_R * 4; + uint4 TT[STATE_CNT4 / 2]; + + for (int dst_off = 0, src_off = 4; src_off < STATE_CNT4; dst_off += 4, src_off += 8) + { + TT[dst_off + 0] = TI[src_off + 0]; + TT[dst_off + 1] = TI[src_off + 1]; + TT[dst_off + 2] = TI[src_off + 2]; + TT[dst_off + 3] = TI[src_off + 3]; + } - for (int i = 0; i < SCRYPT_R; i++) + for (int dst_off = 4, src_off = 8; src_off < STATE_CNT4; dst_off += 4, src_off += 8) { - uint4 Y0; - uint4 Y1; - uint4 Y2; - uint4 Y3; - - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; - - SALSA20_8_XOR (); - - TO[idx_r1++] = R0; - TO[idx_r1++] = R1; - TO[idx_r1++] = R2; - TO[idx_r1++] = R3; - - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; - - SALSA20_8_XOR (); - - TO[idx_r2++] = R0; - TO[idx_r2++] = R1; - TO[idx_r2++] = R2; - TO[idx_r2++] = R3; + TI[dst_off + 0] = TI[src_off + 0]; + TI[dst_off + 1] = TI[src_off + 1]; + TI[dst_off + 2] = TI[src_off + 2]; + TI[dst_off + 3] = TI[src_off + 3]; } - #pragma unroll - for (int i = 0; i < STATE_CNT4; i++) + for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += 4, src_off += 4) { - TI[i] = TO[i]; + TI[dst_off + 0] = TT[src_off + 0]; + TI[dst_off + 1] = TT[src_off + 1]; + TI[dst_off + 2] = TT[src_off + 2]; + TI[dst_off + 3] = TT[src_off + 3]; } + + #endif } -DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +DECLSPEC void scrypt_smix_init (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) { - #define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) - #define CO Coord(xd4,y,z) - const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; @@ -207,37 +203,37 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui case 3: V = V3; break; } - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #ifdef IS_CUDA - T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } - for (u32 y = 0; y < ySIZE; y++) { for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } +} + +DECLSPEC void scrypt_smix_loop (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +{ + const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; + const u32 zSIZE = STATE_CNT4; + + const u32 x = get_global_id (0); + + const u32 xd4 = x / 4; + const u32 xm4 = x & 3; - for (u32 i = 0; i < SCRYPT_N; i++) + GLOBAL_AS uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + + // note: fixed 1024 iterations = forced -u 1024 + + for (u32 N_pos = 0; N_pos < 1024; N_pos++) { const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1); @@ -245,6 +241,8 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui const u32 km = k - (y * SCRYPT_TMTO); + uint4 T[STATE_CNT4]; + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); @@ -253,29 +251,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui salsa_r (X); } - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #ifdef IS_CUDA - T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } } #ifndef KECCAK_ROUNDS @@ -475,44 +450,96 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ tmps[gid].P[k + 0] = tmp0; tmps[gid].P[k + 1] = tmp1; } + + for (u32 l = 0; l < SCRYPT_CNT4; l += 4) + { + uint4 T[4]; + + T[0] = tmps[gid].P[l + 0]; + T[1] = tmps[gid].P[l + 1]; + T[2] = tmps[gid].P[l + 2]; + T[3] = tmps[gid].P[l + 3]; + + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + uint4 X[4]; + + #ifdef IS_CUDA + X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #else + X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w); + #endif + + tmps[gid].P[l + 0] = X[0]; + tmps[gid].P[l + 1] = X[1]; + tmps[gid].P[l + 2] = X[2]; + tmps[gid].P[l + 3] = X[3]; + } } -KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t)) +KERNEL_FQ void m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { + /** + * base + */ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); if (gid >= gid_max) return; + // SCRYPT part, init V + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; uint4 X[STATE_CNT4]; - uint4 T[STATE_CNT4]; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]); + const u32 P_offset = salt_repeat * STATE_CNT4; - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]); + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - #if SCRYPT_P >= 1 - for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4) - { - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; +} - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]); - } - #endif +KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; + GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; + GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; + GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; + + uint4 X[STATE_CNT4]; + + const u32 P_offset = salt_repeat * STATE_CNT4; + + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; + + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; + + scrypt_smix_loop (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t)) @@ -541,35 +568,48 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ for (u32 l = 0; l < SCRYPT_CNT4; l += 4) { - uint4 tmp; - - tmp = tmps[gid].P[l + 0]; + uint4 X[4]; - w0[0] = tmp.x; - w0[1] = tmp.y; - w0[2] = tmp.z; - w0[3] = tmp.w; + X[0] = tmps[gid].P[l + 0]; + X[1] = tmps[gid].P[l + 1]; + X[2] = tmps[gid].P[l + 2]; + X[3] = tmps[gid].P[l + 3]; - tmp = tmps[gid].P[l + 1]; + uint4 T[4]; - w1[0] = tmp.x; - w1[1] = tmp.y; - w1[2] = tmp.z; - w1[3] = tmp.w; - - tmp = tmps[gid].P[l + 2]; - - w2[0] = tmp.x; - w2[1] = tmp.y; - w2[2] = tmp.z; - w2[3] = tmp.w; - - tmp = tmps[gid].P[l + 3]; + #ifdef IS_CUDA + T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #else + T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w); + #endif - w3[0] = tmp.x; - w3[1] = tmp.y; - w3[2] = tmp.z; - w3[3] = tmp.w; + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + w0[0] = T[0].x; + w0[1] = T[0].y; + w0[2] = T[0].z; + w0[3] = T[0].w; + w1[0] = T[1].x; + w1[1] = T[1].y; + w1[2] = T[1].z; + w1[3] = T[1].w; + w2[0] = T[2].x; + w2[1] = T[2].y; + w2[2] = T[2].z; + w2[3] = T[2].w; + w3[0] = T[3].x; + w3[1] = T[3].y; + w3[2] = T[3].z; + w3[3] = T[3].w; sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } diff --git a/OpenCL/m15900-pure.cl b/OpenCL/m15900-pure.cl index f02201d88..c44a61149 100644 --- a/OpenCL/m15900-pure.cl +++ b/OpenCL/m15900-pure.cl @@ -859,7 +859,7 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) && (expected_key[2] == h32_from_64_S (ctx.opad.h[1])) && (expected_key[3] == l32_from_64_S (ctx.opad.h[1]))) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16100_a0-optimized.cl b/OpenCL/m16100_a0-optimized.cl index c52d9a7cc..6fd679154 100644 --- a/OpenCL/m16100_a0-optimized.cl +++ b/OpenCL/m16100_a0-optimized.cl @@ -258,7 +258,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -276,7 +276,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -295,7 +295,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -543,7 +543,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -561,7 +561,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -580,7 +580,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16100_a0-pure.cl b/OpenCL/m16100_a0-pure.cl index 95383651a..1bb0cda09 100644 --- a/OpenCL/m16100_a0-pure.cl +++ b/OpenCL/m16100_a0-pure.cl @@ -143,7 +143,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -161,7 +161,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -180,7 +180,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -305,7 +305,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -323,7 +323,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -342,7 +342,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16100_a1-optimized.cl b/OpenCL/m16100_a1-optimized.cl index 5978e8881..826992386 100644 --- a/OpenCL/m16100_a1-optimized.cl +++ b/OpenCL/m16100_a1-optimized.cl @@ -317,7 +317,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -335,7 +335,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -354,7 +354,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -663,7 +663,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -681,7 +681,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -700,7 +700,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16100_a1-pure.cl b/OpenCL/m16100_a1-pure.cl index 92d72903b..90415e54d 100644 --- a/OpenCL/m16100_a1-pure.cl +++ b/OpenCL/m16100_a1-pure.cl @@ -136,7 +136,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -154,7 +154,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -173,7 +173,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -294,7 +294,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -312,7 +312,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -331,7 +331,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16100_a3-optimized.cl b/OpenCL/m16100_a3-optimized.cl index 4da7aebc1..fb65e0f52 100644 --- a/OpenCL/m16100_a3-optimized.cl +++ b/OpenCL/m16100_a3-optimized.cl @@ -232,7 +232,7 @@ DECLSPEC void m16100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -250,7 +250,7 @@ DECLSPEC void m16100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -269,7 +269,7 @@ DECLSPEC void m16100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -485,7 +485,7 @@ DECLSPEC void m16100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -503,7 +503,7 @@ DECLSPEC void m16100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -522,7 +522,7 @@ DECLSPEC void m16100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -575,7 +575,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_m08 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -622,7 +622,7 @@ KERNEL_FQ void m16100_m08 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_m16 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -669,7 +669,7 @@ KERNEL_FQ void m16100_m16 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -716,7 +716,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_s08 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -763,7 +763,7 @@ KERNEL_FQ void m16100_s08 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_s16 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -810,5 +810,5 @@ KERNEL_FQ void m16100_s16 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m16100_a3-pure.cl b/OpenCL/m16100_a3-pure.cl index 0829b7eb8..634f05528 100644 --- a/OpenCL/m16100_a3-pure.cl +++ b/OpenCL/m16100_a3-pure.cl @@ -153,7 +153,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -171,7 +171,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -190,7 +190,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -328,7 +328,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -346,7 +346,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -365,7 +365,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16200-pure.cl b/OpenCL/m16200-pure.cl index dec32a02e..fb842e566 100644 --- a/OpenCL/m16200-pure.cl +++ b/OpenCL/m16200-pure.cl @@ -394,7 +394,7 @@ KERNEL_FQ void m16200_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl if ((A[0] == 0xa6a6a6a6) && (A[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m16400_a0-pure.cl b/OpenCL/m16400_a0-pure.cl index 410f42885..2bdf16a7f 100644 --- a/OpenCL/m16400_a0-pure.cl +++ b/OpenCL/m16400_a0-pure.cl @@ -118,11 +118,9 @@ DECLSPEC void cram_md5_transform (const u32 *w0, const u32 *w1, const u32 *w2, c DECLSPEC void cram_md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - #ifdef IS_AMD - MAYBE_VOLATILE const int pos = ctx->len & 63; - #else - MAYBE_VOLATILE const int pos = ctx->len & 63; - #endif + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; diff --git a/OpenCL/m16400_a1-pure.cl b/OpenCL/m16400_a1-pure.cl index 67c1a2ec8..0953452d2 100644 --- a/OpenCL/m16400_a1-pure.cl +++ b/OpenCL/m16400_a1-pure.cl @@ -116,11 +116,9 @@ DECLSPEC void cram_md5_transform (const u32 *w0, const u32 *w1, const u32 *w2, c DECLSPEC void cram_md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - #ifdef IS_AMD - MAYBE_VOLATILE const int pos = ctx->len & 63; - #else - MAYBE_VOLATILE const int pos = ctx->len & 63; - #endif + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; diff --git a/OpenCL/m16400_a3-optimized.cl b/OpenCL/m16400_a3-optimized.cl index ffb1550c0..2e6cb5041 100644 --- a/OpenCL/m16400_a3-optimized.cl +++ b/OpenCL/m16400_a3-optimized.cl @@ -504,7 +504,7 @@ KERNEL_FQ void m16400_m04 (KERN_ATTR_VECTOR ()) * main */ - m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_m08 (KERN_ATTR_VECTOR ()) @@ -542,7 +542,7 @@ KERNEL_FQ void m16400_m08 (KERN_ATTR_VECTOR ()) * main */ - m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_m16 (KERN_ATTR_VECTOR ()) @@ -580,7 +580,7 @@ KERNEL_FQ void m16400_m16 (KERN_ATTR_VECTOR ()) * main */ - m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_s04 (KERN_ATTR_VECTOR ()) @@ -618,7 +618,7 @@ KERNEL_FQ void m16400_s04 (KERN_ATTR_VECTOR ()) * main */ - m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_s08 (KERN_ATTR_VECTOR ()) @@ -656,7 +656,7 @@ KERNEL_FQ void m16400_s08 (KERN_ATTR_VECTOR ()) * main */ - m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_s16 (KERN_ATTR_VECTOR ()) @@ -694,5 +694,5 @@ KERNEL_FQ void m16400_s16 (KERN_ATTR_VECTOR ()) * main */ - m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m16400_a3-pure.cl b/OpenCL/m16400_a3-pure.cl index 59520bbdf..22b6d422c 100644 --- a/OpenCL/m16400_a3-pure.cl +++ b/OpenCL/m16400_a3-pure.cl @@ -116,11 +116,9 @@ DECLSPEC void cram_md5_transform_vector (const u32x *w0, const u32x *w1, const u DECLSPEC void cram_md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - #ifdef IS_AMD - MAYBE_VOLATILE const int pos = ctx->len & 63; - #else - MAYBE_VOLATILE const int pos = ctx->len & 63; - #endif + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl index 7dd9910ee..7a69a1212 100644 --- a/OpenCL/m16600_a0-optimized.cl +++ b/OpenCL/m16600_a0-optimized.cl @@ -384,7 +384,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -400,7 +400,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -412,7 +412,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -420,7 +420,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -796,7 +796,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -812,7 +812,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -824,7 +824,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -832,7 +832,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16600_a0-pure.cl b/OpenCL/m16600_a0-pure.cl index 21752fd2a..5d4ca64f5 100644 --- a/OpenCL/m16600_a0-pure.cl +++ b/OpenCL/m16600_a0-pure.cl @@ -198,7 +198,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -214,7 +214,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -226,7 +226,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -234,7 +234,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -416,7 +416,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -432,7 +432,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -444,7 +444,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -452,7 +452,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl index df070f6d6..150c6be9f 100644 --- a/OpenCL/m16600_a1-optimized.cl +++ b/OpenCL/m16600_a1-optimized.cl @@ -440,7 +440,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -456,7 +456,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -468,7 +468,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -476,7 +476,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -910,7 +910,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -926,7 +926,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -938,7 +938,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -946,7 +946,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16600_a1-pure.cl b/OpenCL/m16600_a1-pure.cl index 6959f34f5..841535295 100644 --- a/OpenCL/m16600_a1-pure.cl +++ b/OpenCL/m16600_a1-pure.cl @@ -194,7 +194,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -210,7 +210,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -222,7 +222,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -230,7 +230,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -410,7 +410,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -426,7 +426,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -438,7 +438,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -446,7 +446,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16600_a3-optimized.cl b/OpenCL/m16600_a3-optimized.cl index 6e5902c0b..e2ebdf2e9 100644 --- a/OpenCL/m16600_a3-optimized.cl +++ b/OpenCL/m16600_a3-optimized.cl @@ -161,7 +161,7 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -177,7 +177,7 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -189,7 +189,7 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -197,7 +197,7 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -298,7 +298,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_m08 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -393,7 +393,7 @@ KERNEL_FQ void m16600_m08 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_m16 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -488,7 +488,7 @@ KERNEL_FQ void m16600_m16 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -583,7 +583,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_s08 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -678,7 +678,7 @@ KERNEL_FQ void m16600_s08 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_s16 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -773,5 +773,5 @@ KERNEL_FQ void m16600_s16 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m16600_a3-pure.cl b/OpenCL/m16600_a3-pure.cl index b3186aedc..f5a9a515d 100644 --- a/OpenCL/m16600_a3-pure.cl +++ b/OpenCL/m16600_a3-pure.cl @@ -207,7 +207,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -223,7 +223,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -235,7 +235,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -243,7 +243,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -436,7 +436,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -452,7 +452,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -464,7 +464,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -472,7 +472,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m16800-pure.cl b/OpenCL/m16800-pure.cl index 4ba866f7c..54e5a3c9a 100644 --- a/OpenCL/m16800-pure.cl +++ b/OpenCL/m16800-pure.cl @@ -299,7 +299,7 @@ KERNEL_FQ void m16800_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t) && (hc_swap32_S (r2) == wpa_pmkid->pmkid[2]) && (hc_swap32_S (r3) == wpa_pmkid->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } diff --git a/OpenCL/m16801-pure.cl b/OpenCL/m16801-pure.cl index bb23720b9..d1a382280 100644 --- a/OpenCL/m16801-pure.cl +++ b/OpenCL/m16801-pure.cl @@ -172,7 +172,7 @@ KERNEL_FQ void m16801_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_pmkid_t)) && (hc_swap32_S (r2) == wpa_pmkid->pmkid[2]) && (hc_swap32_S (r3) == wpa_pmkid->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } diff --git a/OpenCL/m17300_a3-optimized.cl b/OpenCL/m17300_a3-optimized.cl index a8ff8cae9..0ae7ede66 100644 --- a/OpenCL/m17300_a3-optimized.cl +++ b/OpenCL/m17300_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m17300_m04 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17300_m08 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17300_m16 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17300_s04 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17300_s08 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17300_s16 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17400_a3-optimized.cl b/OpenCL/m17400_a3-optimized.cl index e918558ef..989680c60 100644 --- a/OpenCL/m17400_a3-optimized.cl +++ b/OpenCL/m17400_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m17400_m04 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17400_m08 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17400_m16 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17400_s04 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17400_s08 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17400_s16 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17500_a3-optimized.cl b/OpenCL/m17500_a3-optimized.cl index 61b82fff4..3b8ffeba2 100644 --- a/OpenCL/m17500_a3-optimized.cl +++ b/OpenCL/m17500_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m17500_m04 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17500_m08 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17500_m16 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17500_s04 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17500_s08 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17500_s16 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17600_a3-optimized.cl b/OpenCL/m17600_a3-optimized.cl index 5b48782c7..5897c897e 100644 --- a/OpenCL/m17600_a3-optimized.cl +++ b/OpenCL/m17600_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m17600_m04 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17600_m08 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17600_m16 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17600_s04 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17600_s08 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17600_s16 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17700_a3-optimized.cl b/OpenCL/m17700_a3-optimized.cl index 8bac072a7..a0478c740 100644 --- a/OpenCL/m17700_a3-optimized.cl +++ b/OpenCL/m17700_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m17700_m04 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17700_m08 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17700_m16 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17700_s04 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17700_s08 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17700_s16 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17800_a3-optimized.cl b/OpenCL/m17800_a3-optimized.cl index cf7b4879d..dbb761026 100644 --- a/OpenCL/m17800_a3-optimized.cl +++ b/OpenCL/m17800_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m17800_m04 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17800_m08 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17800_m16 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17800_s04 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17800_s08 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17800_s16 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17900_a3-optimized.cl b/OpenCL/m17900_a3-optimized.cl index acbcc5abc..ad1cbb93d 100644 --- a/OpenCL/m17900_a3-optimized.cl +++ b/OpenCL/m17900_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m17900_m04 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17900_m08 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17900_m16 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17900_s04 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17900_s08 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17900_s16 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m18000_a3-optimized.cl b/OpenCL/m18000_a3-optimized.cl index 82f06c7f0..40dc22695 100644 --- a/OpenCL/m18000_a3-optimized.cl +++ b/OpenCL/m18000_a3-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m18000_m04 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m18000_m08 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m18000_m16 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m18000_s04 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m18000_s08 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m18000_s16 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m18200_a0-optimized.cl b/OpenCL/m18200_a0-optimized.cl index e0ecdbc6f..3374f8c23 100644 --- a/OpenCL/m18200_a0-optimized.cl +++ b/OpenCL/m18200_a0-optimized.cl @@ -655,7 +655,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -753,7 +753,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m18200_a0-pure.cl b/OpenCL/m18200_a0-pure.cl index 321971402..71f19cbb3 100644 --- a/OpenCL/m18200_a0-pure.cl +++ b/OpenCL/m18200_a0-pure.cl @@ -439,7 +439,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -501,7 +501,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m18200_a1-optimized.cl b/OpenCL/m18200_a1-optimized.cl index 0d1ff44c2..2d77d1e53 100644 --- a/OpenCL/m18200_a1-optimized.cl +++ b/OpenCL/m18200_a1-optimized.cl @@ -702,7 +702,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -849,7 +849,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m18200_a1-pure.cl b/OpenCL/m18200_a1-pure.cl index c82591db4..e5df3c2d1 100644 --- a/OpenCL/m18200_a1-pure.cl +++ b/OpenCL/m18200_a1-pure.cl @@ -435,7 +435,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -495,7 +495,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m18200_a3-optimized.cl b/OpenCL/m18200_a3-optimized.cl index db9b24d67..1a32c19de 100644 --- a/OpenCL/m18200_a3-optimized.cl +++ b/OpenCL/m18200_a3-optimized.cl @@ -622,7 +622,7 @@ DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -679,7 +679,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -731,7 +731,7 @@ KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_m16 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -787,7 +787,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -839,7 +839,7 @@ KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t)) LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_s16 (KERN_ATTR_ESALT (krb5asrep_t)) diff --git a/OpenCL/m18200_a3-pure.cl b/OpenCL/m18200_a3-pure.cl index c97a59024..68ddd2c18 100644 --- a/OpenCL/m18200_a3-pure.cl +++ b/OpenCL/m18200_a3-pure.cl @@ -448,7 +448,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -521,7 +521,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m18300-pure.cl b/OpenCL/m18300-pure.cl index 1a349a46f..772b934b5 100644 --- a/OpenCL/m18300-pure.cl +++ b/OpenCL/m18300-pure.cl @@ -434,7 +434,7 @@ KERNEL_FQ void m18300_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl if ((A[0] == 0xa6a6a6a6) && (A[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m18700_a3-optimized.cl b/OpenCL/m18700_a3-optimized.cl index 9c1547e2f..01490025c 100644 --- a/OpenCL/m18700_a3-optimized.cl +++ b/OpenCL/m18700_a3-optimized.cl @@ -181,7 +181,7 @@ KERNEL_FQ void m18700_m04 (KERN_ATTR_VECTOR ()) * main */ - m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_m08 (KERN_ATTR_VECTOR ()) @@ -219,7 +219,7 @@ KERNEL_FQ void m18700_m08 (KERN_ATTR_VECTOR ()) * main */ - m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_m16 (KERN_ATTR_VECTOR ()) @@ -257,7 +257,7 @@ KERNEL_FQ void m18700_m16 (KERN_ATTR_VECTOR ()) * main */ - m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_s04 (KERN_ATTR_VECTOR ()) @@ -295,7 +295,7 @@ KERNEL_FQ void m18700_s04 (KERN_ATTR_VECTOR ()) * main */ - m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_s08 (KERN_ATTR_VECTOR ()) @@ -333,7 +333,7 @@ KERNEL_FQ void m18700_s08 (KERN_ATTR_VECTOR ()) * main */ - m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_s16 (KERN_ATTR_VECTOR ()) @@ -371,5 +371,5 @@ KERNEL_FQ void m18700_s16 (KERN_ATTR_VECTOR ()) * main */ - m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m18900-pure.cl b/OpenCL/m18900-pure.cl index ec7d42f88..dc3758ba7 100644 --- a/OpenCL/m18900-pure.cl +++ b/OpenCL/m18900-pure.cl @@ -338,7 +338,7 @@ KERNEL_FQ void m18900_comp (KERN_ATTR_TMPS_ESALT (android_backup_tmp_t, android_ if ((pt[2] == 0x0d0d0d0d) && (pt[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m19200-pure.cl b/OpenCL/m19200-pure.cl index a856109ed..e0119a294 100644 --- a/OpenCL/m19200-pure.cl +++ b/OpenCL/m19200-pure.cl @@ -27,7 +27,7 @@ typedef struct qnx_sha512_tmp DECLSPEC u32 sha512_update_128_qnxbug (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len, u32 sav) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; ctx->len += len; @@ -313,7 +313,7 @@ DECLSPEC u32 sha512_update_global_swap_qnxbug (sha512_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void sha512_final_qnxbug (sha512_ctx_t *ctx, u32 sav) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); diff --git a/OpenCL/m19600-pure.cl b/OpenCL/m19600-pure.cl index 8604921e0..c8f3d2907 100644 --- a/OpenCL/m19600-pure.cl +++ b/OpenCL/m19600-pure.cl @@ -997,7 +997,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t && sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1] && sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2]) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); diff --git a/OpenCL/m19700-pure.cl b/OpenCL/m19700-pure.cl index 9cc74c490..087a12bbf 100644 --- a/OpenCL/m19700-pure.cl +++ b/OpenCL/m19700-pure.cl @@ -1061,7 +1061,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t && sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1] && sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2]) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); diff --git a/OpenCL/m19800-pure.cl b/OpenCL/m19800-pure.cl index 9959b86b8..080241af6 100644 --- a/OpenCL/m19800-pure.cl +++ b/OpenCL/m19800-pure.cl @@ -609,7 +609,7 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) && (sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1]) && (sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2])) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 diff --git a/OpenCL/m19900-pure.cl b/OpenCL/m19900-pure.cl index cf1fdd685..acee3d519 100644 --- a/OpenCL/m19900-pure.cl +++ b/OpenCL/m19900-pure.cl @@ -651,7 +651,7 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) && (sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1]) && (sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2])) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 diff --git a/OpenCL/m20011-pure.cl b/OpenCL/m20011-pure.cl index f4c827431..9eeaade86 100644 --- a/OpenCL/m20011-pure.cl +++ b/OpenCL/m20011-pure.cl @@ -407,7 +407,7 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -415,7 +415,7 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -423,7 +423,7 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } diff --git a/OpenCL/m20012-pure.cl b/OpenCL/m20012-pure.cl index 5c538b713..ff7879dcc 100644 --- a/OpenCL/m20012-pure.cl +++ b/OpenCL/m20012-pure.cl @@ -407,7 +407,7 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -415,7 +415,7 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -423,7 +423,7 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -453,7 +453,7 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_serpent_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -461,7 +461,7 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_twofish_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -469,7 +469,7 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_aes_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } diff --git a/OpenCL/m20013-pure.cl b/OpenCL/m20013-pure.cl index 608b6ff79..3ee6108fc 100644 --- a/OpenCL/m20013-pure.cl +++ b/OpenCL/m20013-pure.cl @@ -407,7 +407,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -415,7 +415,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -423,7 +423,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -453,7 +453,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_serpent_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -461,7 +461,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_twofish_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -469,7 +469,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_aes_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -499,7 +499,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_serpent_twofish_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } @@ -507,7 +507,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt if (dcrp_verify_header_aes_twofish_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } diff --git a/OpenCL/m20710_a0-optimized.cl b/OpenCL/m20710_a0-optimized.cl index 42b2d7702..2370ab876 100644 --- a/OpenCL/m20710_a0-optimized.cl +++ b/OpenCL/m20710_a0-optimized.cl @@ -382,114 +382,6 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_RULES ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -1088,114 +980,6 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_RULES ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; diff --git a/OpenCL/m20710_a1-optimized.cl b/OpenCL/m20710_a1-optimized.cl index 05f6b0908..2b9b6b6fe 100644 --- a/OpenCL/m20710_a1-optimized.cl +++ b/OpenCL/m20710_a1-optimized.cl @@ -438,114 +438,6 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_BASIC ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -1202,114 +1094,6 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_BASIC ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; diff --git a/OpenCL/m20710_a3-optimized.cl b/OpenCL/m20710_a3-optimized.cl index 7a0df4363..facce2424 100644 --- a/OpenCL/m20710_a3-optimized.cl +++ b/OpenCL/m20710_a3-optimized.cl @@ -339,114 +339,6 @@ DECLSPEC void m20710m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR (), LOCAL_AS u // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -996,114 +888,6 @@ DECLSPEC void m20710s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR (), LOCAL_AS u // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -1403,7 +1187,7 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_VECTOR ()) * main */ - m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_m08 (KERN_ATTR_VECTOR ()) @@ -1464,7 +1248,7 @@ KERNEL_FQ void m20710_m08 (KERN_ATTR_VECTOR ()) * main */ - m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_m16 (KERN_ATTR_VECTOR ()) @@ -1525,7 +1309,7 @@ KERNEL_FQ void m20710_m16 (KERN_ATTR_VECTOR ()) * main */ - m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_s04 (KERN_ATTR_VECTOR ()) @@ -1586,7 +1370,7 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_VECTOR ()) * main */ - m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_s08 (KERN_ATTR_VECTOR ()) @@ -1647,7 +1431,7 @@ KERNEL_FQ void m20710_s08 (KERN_ATTR_VECTOR ()) * main */ - m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_s16 (KERN_ATTR_VECTOR ()) @@ -1708,5 +1492,5 @@ KERNEL_FQ void m20710_s16 (KERN_ATTR_VECTOR ()) * main */ - m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m20800_a3-optimized.cl b/OpenCL/m20800_a3-optimized.cl index 66c340f4e..a879c64c8 100644 --- a/OpenCL/m20800_a3-optimized.cl +++ b/OpenCL/m20800_a3-optimized.cl @@ -613,7 +613,7 @@ KERNEL_FQ void m20800_m04 (KERN_ATTR_BASIC ()) * main */ - m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_m08 (KERN_ATTR_BASIC ()) @@ -683,7 +683,7 @@ KERNEL_FQ void m20800_m08 (KERN_ATTR_BASIC ()) * main */ - m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_m16 (KERN_ATTR_BASIC ()) @@ -753,7 +753,7 @@ KERNEL_FQ void m20800_m16 (KERN_ATTR_BASIC ()) * main */ - m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_s04 (KERN_ATTR_BASIC ()) @@ -823,7 +823,7 @@ KERNEL_FQ void m20800_s04 (KERN_ATTR_BASIC ()) * main */ - m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_s08 (KERN_ATTR_BASIC ()) @@ -893,7 +893,7 @@ KERNEL_FQ void m20800_s08 (KERN_ATTR_BASIC ()) * main */ - m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_s16 (KERN_ATTR_BASIC ()) @@ -963,5 +963,5 @@ KERNEL_FQ void m20800_s16 (KERN_ATTR_BASIC ()) * main */ - m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m20900_a3-optimized.cl b/OpenCL/m20900_a3-optimized.cl index e563816f6..28b6206ba 100644 --- a/OpenCL/m20900_a3-optimized.cl +++ b/OpenCL/m20900_a3-optimized.cl @@ -1431,7 +1431,7 @@ KERNEL_FQ void m20900_m04 (KERN_ATTR_BASIC ()) * main */ - m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_m08 (KERN_ATTR_BASIC ()) @@ -1501,7 +1501,7 @@ KERNEL_FQ void m20900_m08 (KERN_ATTR_BASIC ()) * main */ - m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_m16 (KERN_ATTR_BASIC ()) @@ -1571,7 +1571,7 @@ KERNEL_FQ void m20900_m16 (KERN_ATTR_BASIC ()) * main */ - m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_s04 (KERN_ATTR_BASIC ()) @@ -1641,7 +1641,7 @@ KERNEL_FQ void m20900_s04 (KERN_ATTR_BASIC ()) * main */ - m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_s08 (KERN_ATTR_BASIC ()) @@ -1711,7 +1711,7 @@ KERNEL_FQ void m20900_s08 (KERN_ATTR_BASIC ()) * main */ - m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_s16 (KERN_ATTR_BASIC ()) @@ -1781,5 +1781,5 @@ KERNEL_FQ void m20900_s16 (KERN_ATTR_BASIC ()) * main */ - m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m21000_a3-optimized.cl b/OpenCL/m21000_a3-optimized.cl index 7ba7e964f..a0c23197e 100644 --- a/OpenCL/m21000_a3-optimized.cl +++ b/OpenCL/m21000_a3-optimized.cl @@ -451,7 +451,7 @@ KERNEL_FQ void m21000_m04 (KERN_ATTR_VECTOR ()) * main */ - m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_m08 (KERN_ATTR_VECTOR ()) @@ -489,7 +489,7 @@ KERNEL_FQ void m21000_m08 (KERN_ATTR_VECTOR ()) * main */ - m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_m16 (KERN_ATTR_VECTOR ()) @@ -527,7 +527,7 @@ KERNEL_FQ void m21000_m16 (KERN_ATTR_VECTOR ()) * main */ - m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_s04 (KERN_ATTR_VECTOR ()) @@ -565,7 +565,7 @@ KERNEL_FQ void m21000_s04 (KERN_ATTR_VECTOR ()) * main */ - m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_s08 (KERN_ATTR_VECTOR ()) @@ -603,7 +603,7 @@ KERNEL_FQ void m21000_s08 (KERN_ATTR_VECTOR ()) * main */ - m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_s16 (KERN_ATTR_VECTOR ()) @@ -641,5 +641,5 @@ KERNEL_FQ void m21000_s16 (KERN_ATTR_VECTOR ()) * main */ - m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m21100_a3-optimized.cl b/OpenCL/m21100_a3-optimized.cl index 2a2e3dd1c..4dacc7376 100644 --- a/OpenCL/m21100_a3-optimized.cl +++ b/OpenCL/m21100_a3-optimized.cl @@ -707,7 +707,7 @@ KERNEL_FQ void m21100_m04 (KERN_ATTR_BASIC ()) * main */ - m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_m08 (KERN_ATTR_BASIC ()) @@ -777,7 +777,7 @@ KERNEL_FQ void m21100_m08 (KERN_ATTR_BASIC ()) * main */ - m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_m16 (KERN_ATTR_BASIC ()) @@ -847,7 +847,7 @@ KERNEL_FQ void m21100_m16 (KERN_ATTR_BASIC ()) * main */ - m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_s04 (KERN_ATTR_BASIC ()) @@ -917,7 +917,7 @@ KERNEL_FQ void m21100_s04 (KERN_ATTR_BASIC ()) * main */ - m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_s08 (KERN_ATTR_BASIC ()) @@ -987,7 +987,7 @@ KERNEL_FQ void m21100_s08 (KERN_ATTR_BASIC ()) * main */ - m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_s16 (KERN_ATTR_BASIC ()) @@ -1057,5 +1057,5 @@ KERNEL_FQ void m21100_s16 (KERN_ATTR_BASIC ()) * main */ - m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m21100_a3-pure.cl b/OpenCL/m21100_a3-pure.cl index 4f5d7cd67..480d7eb4e 100644 --- a/OpenCL/m21100_a3-pure.cl +++ b/OpenCL/m21100_a3-pure.cl @@ -71,7 +71,7 @@ KERNEL_FQ void m21100_mxx (KERN_ATTR_VECTOR ()) const u32 salt_len = salt_bufs[SALT_POS].salt_len; - u32 s[64] = { 0 }; + u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { @@ -211,7 +211,7 @@ KERNEL_FQ void m21100_sxx (KERN_ATTR_VECTOR ()) const u32 salt_len = salt_bufs[SALT_POS].salt_len; - u32 s[64] = { 0 }; + u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { diff --git a/OpenCL/m21200_a3-optimized.cl b/OpenCL/m21200_a3-optimized.cl index cd4545efc..06490371d 100644 --- a/OpenCL/m21200_a3-optimized.cl +++ b/OpenCL/m21200_a3-optimized.cl @@ -854,7 +854,7 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) * main */ - m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_m08 (KERN_ATTR_BASIC ()) @@ -924,7 +924,7 @@ KERNEL_FQ void m21200_m08 (KERN_ATTR_BASIC ()) * main */ - m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_m16 (KERN_ATTR_BASIC ()) @@ -994,7 +994,7 @@ KERNEL_FQ void m21200_m16 (KERN_ATTR_BASIC ()) * main */ - m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) @@ -1064,7 +1064,7 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) * main */ - m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_s08 (KERN_ATTR_BASIC ()) @@ -1134,7 +1134,7 @@ KERNEL_FQ void m21200_s08 (KERN_ATTR_BASIC ()) * main */ - m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_s16 (KERN_ATTR_BASIC ()) @@ -1204,5 +1204,5 @@ KERNEL_FQ void m21200_s16 (KERN_ATTR_BASIC ()) * main */ - m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); + m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m21400_a3-optimized.cl b/OpenCL/m21400_a3-optimized.cl index 65f0b3ff6..fe61dac8f 100644 --- a/OpenCL/m21400_a3-optimized.cl +++ b/OpenCL/m21400_a3-optimized.cl @@ -534,7 +534,7 @@ KERNEL_FQ void m21400_m04 (KERN_ATTR_VECTOR ()) * main */ - m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_m08 (KERN_ATTR_VECTOR ()) @@ -572,7 +572,7 @@ KERNEL_FQ void m21400_m08 (KERN_ATTR_VECTOR ()) * main */ - m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_m16 (KERN_ATTR_VECTOR ()) @@ -610,7 +610,7 @@ KERNEL_FQ void m21400_m16 (KERN_ATTR_VECTOR ()) * main */ - m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_s04 (KERN_ATTR_VECTOR ()) @@ -648,7 +648,7 @@ KERNEL_FQ void m21400_s04 (KERN_ATTR_VECTOR ()) * main */ - m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_s08 (KERN_ATTR_VECTOR ()) @@ -686,7 +686,7 @@ KERNEL_FQ void m21400_s08 (KERN_ATTR_VECTOR ()) * main */ - m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_s16 (KERN_ATTR_VECTOR ()) @@ -724,5 +724,5 @@ KERNEL_FQ void m21400_s16 (KERN_ATTR_VECTOR ()) * main */ - m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m21800-pure.cl b/OpenCL/m21800-pure.cl index afa33ed42..6de91529b 100644 --- a/OpenCL/m21800-pure.cl +++ b/OpenCL/m21800-pure.cl @@ -605,7 +605,7 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) ((tmp[0] == 0x7b) && (tmp[1] == 0x0d) && (tmp[2] == 0x0a) && (tmp[3] == 0x20) && (tmp[4] == 0x20) && (tmp[5] == 0x20) && (tmp[6] == 0x20) && (tmp[7] == 0x22))) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m22000-pure.cl b/OpenCL/m22000-pure.cl index 2e2a87f61..8cc624a9b 100644 --- a/OpenCL/m22000-pure.cl +++ b/OpenCL/m22000-pure.cl @@ -488,7 +488,7 @@ KERNEL_FQ void m22000_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -668,7 +668,7 @@ KERNEL_FQ void m22000_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -963,7 +963,7 @@ KERNEL_FQ void m22000_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (keymic[2] == wpa->keymic[2]) && (keymic[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -1028,7 +1028,7 @@ KERNEL_FQ void m22000_aux4 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (hc_swap32_S (r2) == wpa->pmkid[2]) && (hc_swap32_S (r3) == wpa->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } diff --git a/OpenCL/m22001-pure.cl b/OpenCL/m22001-pure.cl index c41533432..95e0e0395 100644 --- a/OpenCL/m22001-pure.cl +++ b/OpenCL/m22001-pure.cl @@ -395,7 +395,7 @@ KERNEL_FQ void m22001_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -575,7 +575,7 @@ KERNEL_FQ void m22001_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -870,7 +870,7 @@ KERNEL_FQ void m22001_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (keymic[2] == wpa->keymic[2]) && (keymic[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } @@ -935,7 +935,7 @@ KERNEL_FQ void m22001_aux4 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (hc_swap32_S (r2) == wpa->pmkid[2]) && (hc_swap32_S (r3) == wpa->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } diff --git a/OpenCL/m22100-pure.cl b/OpenCL/m22100-pure.cl index 9961c10f7..26bbd9505 100644 --- a/OpenCL/m22100-pure.cl +++ b/OpenCL/m22100-pure.cl @@ -470,7 +470,7 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) if (type == 0) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } @@ -569,7 +569,7 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) // if we end up here, we are sure to have found the correct password: - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m22200_a3-optimized.cl b/OpenCL/m22200_a3-optimized.cl index 1720702af..a30697b83 100644 --- a/OpenCL/m22200_a3-optimized.cl +++ b/OpenCL/m22200_a3-optimized.cl @@ -349,7 +349,7 @@ KERNEL_FQ void m22200_m04 (KERN_ATTR_BASIC ()) * main */ - m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_m08 (KERN_ATTR_BASIC ()) @@ -421,7 +421,7 @@ KERNEL_FQ void m22200_m08 (KERN_ATTR_BASIC ()) * main */ - m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_m16 (KERN_ATTR_BASIC ()) @@ -508,7 +508,7 @@ KERNEL_FQ void m22200_m16 (KERN_ATTR_BASIC ()) * main */ - m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_s04 (KERN_ATTR_BASIC ()) @@ -572,7 +572,7 @@ KERNEL_FQ void m22200_s04 (KERN_ATTR_BASIC ()) * main */ - m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_s08 (KERN_ATTR_BASIC ()) @@ -644,7 +644,7 @@ KERNEL_FQ void m22200_s08 (KERN_ATTR_BASIC ()) * main */ - m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_s16 (KERN_ATTR_BASIC ()) @@ -731,5 +731,5 @@ KERNEL_FQ void m22200_s16 (KERN_ATTR_BASIC ()) * main */ - m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m22300_a3-optimized.cl b/OpenCL/m22300_a3-optimized.cl index d10ff302d..04f4d9af5 100644 --- a/OpenCL/m22300_a3-optimized.cl +++ b/OpenCL/m22300_a3-optimized.cl @@ -587,7 +587,7 @@ KERNEL_FQ void m22300_m04 (KERN_ATTR_BASIC ()) * main */ - m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_m08 (KERN_ATTR_BASIC ()) @@ -634,7 +634,7 @@ KERNEL_FQ void m22300_m08 (KERN_ATTR_BASIC ()) * main */ - m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_m16 (KERN_ATTR_BASIC ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m22300_m16 (KERN_ATTR_BASIC ()) * main */ - m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_s04 (KERN_ATTR_BASIC ()) @@ -728,7 +728,7 @@ KERNEL_FQ void m22300_s04 (KERN_ATTR_BASIC ()) * main */ - m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_s08 (KERN_ATTR_BASIC ()) @@ -775,7 +775,7 @@ KERNEL_FQ void m22300_s08 (KERN_ATTR_BASIC ()) * main */ - m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_s16 (KERN_ATTR_BASIC ()) @@ -822,5 +822,5 @@ KERNEL_FQ void m22300_s16 (KERN_ATTR_BASIC ()) * main */ - m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m22400-pure.cl b/OpenCL/m22400-pure.cl index 8045986c5..4df18501a 100644 --- a/OpenCL/m22400-pure.cl +++ b/OpenCL/m22400-pure.cl @@ -26,7 +26,7 @@ typedef struct aescrypt typedef struct aescrypt_tmp { - u32 pass[144]; + u32 pass[80]; int len; } aescrypt_tmp_t; @@ -50,36 +50,13 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) s[2] = salt_bufs[SALT_POS].salt_buf[2]; s[3] = salt_bufs[SALT_POS].salt_buf[3]; - // convert password to utf16le: - const u32 pw_len = pws[gid].pw_len; - const u32 pw_len_utf16le = pw_len * 2; - - u32 w[144] = { 0 }; + u32 w[80] = { 0 }; - for (u32 i = 0, j = 0; i < 64; i += 4, j += 8) + for (u32 i = 0, j = 0; i < pw_len; i += 4, j += 1) { - u32 in[4]; - - in[0] = pws[gid].i[i + 0]; - in[1] = pws[gid].i[i + 1]; - in[2] = pws[gid].i[i + 2]; - in[3] = pws[gid].i[i + 3]; - - u32 out0[4]; - u32 out1[4]; - - make_utf16le_S (in, out0, out1); - - w[j + 0] = hc_swap32_S (out0[0]); - w[j + 1] = hc_swap32_S (out0[1]); - w[j + 2] = hc_swap32_S (out0[2]); - w[j + 3] = hc_swap32_S (out0[3]); - w[j + 4] = hc_swap32_S (out1[0]); - w[j + 5] = hc_swap32_S (out1[1]); - w[j + 6] = hc_swap32_S (out1[2]); - w[j + 7] = hc_swap32_S (out1[3]); + w[j] = hc_swap32_S (pws[gid].i[j]); } // sha256: @@ -88,7 +65,7 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) sha256_init (&ctx); sha256_update (&ctx, s, 32); - sha256_update (&ctx, w, pw_len_utf16le); + sha256_update (&ctx, w, pw_len); sha256_final (&ctx); // set tmps: @@ -110,7 +87,7 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) w[6] = ctx.h[6]; w[7] = ctx.h[7]; - const u32 final_len = 32 + pw_len_utf16le; + const u32 final_len = 32 + pw_len; const u32 idx_floor = (final_len / 64) * 16; const u32 idx_ceil = ((final_len & 63) >= 56) ? idx_floor + 16 : idx_floor; @@ -123,7 +100,7 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) #ifdef _unroll #pragma unroll #endif - for (u32 i = 0; i < 144; i++) + for (u32 i = 0; i < 80; i++) { tmps[gid].pass[i] = w[i]; } @@ -139,17 +116,17 @@ KERNEL_FQ void m22400_loop (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) // init - u32 w[144]; + u32 w[80]; #ifdef _unroll #pragma unroll #endif - for (u32 i = 0; i < 144; i++) + for (u32 i = 0; i < 80; i++) { w[i] = tmps[gid].pass[i]; } - const int pw_len = tmps[gid].len; + const int len = tmps[gid].len; // main loop @@ -174,7 +151,7 @@ KERNEL_FQ void m22400_loop (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) int left; int idx; - for (left = pw_len, idx = 0; left >= 56; left -= 64, idx += 16) + for (left = len, idx = 0; left >= 56; left -= 64, idx += 16) { w0[0] = w[idx + 0]; w0[1] = w[idx + 1]; diff --git a/OpenCL/m22500_a0-optimized.cl b/OpenCL/m22500_a0-optimized.cl index 866229b36..863aca8fd 100644 --- a/OpenCL/m22500_a0-optimized.cl +++ b/OpenCL/m22500_a0-optimized.cl @@ -609,7 +609,7 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -1204,7 +1204,7 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m22500_a0-pure.cl b/OpenCL/m22500_a0-pure.cl index d59915fd8..712f7a8e3 100644 --- a/OpenCL/m22500_a0-pure.cl +++ b/OpenCL/m22500_a0-pure.cl @@ -318,7 +318,7 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -614,7 +614,7 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m22500_a1-optimized.cl b/OpenCL/m22500_a1-optimized.cl index 3f5b4b672..b4b80502f 100644 --- a/OpenCL/m22500_a1-optimized.cl +++ b/OpenCL/m22500_a1-optimized.cl @@ -668,7 +668,7 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -1323,7 +1323,7 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m22500_a1-pure.cl b/OpenCL/m22500_a1-pure.cl index b2442e889..1f2257f45 100644 --- a/OpenCL/m22500_a1-pure.cl +++ b/OpenCL/m22500_a1-pure.cl @@ -322,7 +322,7 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -624,7 +624,7 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m22500_a3-optimized.cl b/OpenCL/m22500_a3-optimized.cl index 4855b6a55..0f8372425 100644 --- a/OpenCL/m22500_a3-optimized.cl +++ b/OpenCL/m22500_a3-optimized.cl @@ -597,7 +597,7 @@ DECLSPEC void m22500 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -696,7 +696,7 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_m08 (KERN_ATTR_VECTOR ()) @@ -791,7 +791,7 @@ KERNEL_FQ void m22500_m08 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_m16 (KERN_ATTR_VECTOR ()) @@ -886,7 +886,7 @@ KERNEL_FQ void m22500_m16 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_s04 (KERN_ATTR_VECTOR ()) @@ -981,7 +981,7 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_s08 (KERN_ATTR_VECTOR ()) @@ -1076,7 +1076,7 @@ KERNEL_FQ void m22500_s08 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_s16 (KERN_ATTR_VECTOR ()) @@ -1171,5 +1171,5 @@ KERNEL_FQ void m22500_s16 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m22500_a3-pure.cl b/OpenCL/m22500_a3-pure.cl index 0e182ad48..bb34fa068 100644 --- a/OpenCL/m22500_a3-pure.cl +++ b/OpenCL/m22500_a3-pure.cl @@ -331,7 +331,7 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_VECTOR ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -642,7 +642,7 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_VECTOR ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m22600-pure.cl b/OpenCL/m22600-pure.cl index 972b8224a..8353f52e8 100644 --- a/OpenCL/m22600-pure.cl +++ b/OpenCL/m22600-pure.cl @@ -521,7 +521,7 @@ KERNEL_FQ void m22600_comp (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) r2 == message_key[2] && r3 == message_key[3]) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl index 985a249da..033d2b0b7 100644 --- a/OpenCL/m22700-pure.cl +++ b/OpenCL/m22700-pure.cl @@ -150,28 +150,8 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) } #endif -#define SALSA20_8_XOR() \ -{ \ - R0 = R0 ^ Y0; \ - R1 = R1 ^ Y1; \ - R2 = R2 ^ Y2; \ - R3 = R3 ^ Y3; \ - \ - uint4 X0 = R0; \ - uint4 X1 = R1; \ - uint4 X2 = R2; \ - uint4 X3 = R3; \ - \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - \ - R0 = R0 + X0; \ - R1 = R1 + X1; \ - R2 = R2 + X2; \ - R3 = R3 + X3; \ -} +#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) +#define CO Coord(xd4,y,z) DECLSPEC void salsa_r (uint4 *TI) { @@ -180,56 +160,72 @@ DECLSPEC void salsa_r (uint4 *TI) uint4 R2 = TI[STATE_CNT4 - 2]; uint4 R3 = TI[STATE_CNT4 - 1]; - uint4 TO[STATE_CNT4]; + for (int i = 0; i < STATE_CNT4; i += 4) + { + uint4 Y0 = TI[i + 0]; + uint4 Y1 = TI[i + 1]; + uint4 Y2 = TI[i + 2]; + uint4 Y3 = TI[i + 3]; + + R0 = R0 ^ Y0; + R1 = R1 ^ Y1; + R2 = R2 ^ Y2; + R3 = R3 ^ Y3; + + uint4 X0 = R0; + uint4 X1 = R1; + uint4 X2 = R2; + uint4 X3 = R3; + + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + + R0 = R0 + X0; + R1 = R1 + X1; + R2 = R2 + X2; + R3 = R3 + X3; + + TI[i + 0] = R0; + TI[i + 1] = R1; + TI[i + 2] = R2; + TI[i + 3] = R3; + } + + #if SCRYPT_R > 1 - int idx_y = 0; - int idx_r1 = 0; - int idx_r2 = SCRYPT_R * 4; + uint4 TT[STATE_CNT4 / 2]; + + for (int dst_off = 0, src_off = 4; src_off < STATE_CNT4; dst_off += 4, src_off += 8) + { + TT[dst_off + 0] = TI[src_off + 0]; + TT[dst_off + 1] = TI[src_off + 1]; + TT[dst_off + 2] = TI[src_off + 2]; + TT[dst_off + 3] = TI[src_off + 3]; + } - for (int i = 0; i < SCRYPT_R; i++) + for (int dst_off = 4, src_off = 8; src_off < STATE_CNT4; dst_off += 4, src_off += 8) { - uint4 Y0; - uint4 Y1; - uint4 Y2; - uint4 Y3; - - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; - - SALSA20_8_XOR (); - - TO[idx_r1++] = R0; - TO[idx_r1++] = R1; - TO[idx_r1++] = R2; - TO[idx_r1++] = R3; - - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; - - SALSA20_8_XOR (); - - TO[idx_r2++] = R0; - TO[idx_r2++] = R1; - TO[idx_r2++] = R2; - TO[idx_r2++] = R3; + TI[dst_off + 0] = TI[src_off + 0]; + TI[dst_off + 1] = TI[src_off + 1]; + TI[dst_off + 2] = TI[src_off + 2]; + TI[dst_off + 3] = TI[src_off + 3]; } - #pragma unroll - for (int i = 0; i < STATE_CNT4; i++) + for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += 4, src_off += 4) { - TI[i] = TO[i]; + TI[dst_off + 0] = TT[src_off + 0]; + TI[dst_off + 1] = TT[src_off + 1]; + TI[dst_off + 2] = TT[src_off + 2]; + TI[dst_off + 3] = TT[src_off + 3]; } + + #endif } -DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +DECLSPEC void scrypt_smix_init (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) { - #define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) - #define CO Coord(xd4,y,z) - const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; @@ -248,37 +244,37 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui case 3: V = V3; break; } - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #ifdef IS_CUDA - T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } - for (u32 y = 0; y < ySIZE; y++) { for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } +} - for (u32 i = 0; i < SCRYPT_N; i++) +DECLSPEC void scrypt_smix_loop (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +{ + const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; + const u32 zSIZE = STATE_CNT4; + + const u32 x = get_global_id (0); + + const u32 xd4 = x / 4; + const u32 xm4 = x & 3; + + GLOBAL_AS uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + + // note: fixed 1024 iterations = forced -u 1024 + + for (u32 N_pos = 0; N_pos < 1024; N_pos++) { const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1); @@ -286,6 +282,8 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui const u32 km = k - (y * SCRYPT_TMTO); + uint4 T[STATE_CNT4]; + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); @@ -294,29 +292,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui salsa_r (X); } - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #ifdef IS_CUDA - T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } } KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -329,41 +304,24 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) if (gid >= gid_max) return; - // convert password to utf16be: + u32 w[128] = { 0 }; - const u32 pw_len = pws[gid].pw_len; + hc_enc_t hc_enc; - const u32 pw_len_utf16be = pw_len * 2; + hc_enc_init (&hc_enc); - u32 w[128] = { 0 }; + const u32 w_len = hc_enc_next_global (&hc_enc, pws[gid].i, pws[gid].pw_len, 256, w, sizeof (w)); - for (u32 i = 0, j = 0; i < 64; i += 4, j += 8) + // utf16le to utf16be + for (int i = 0, j = 0; i < w_len; i += 4, j += 1) { - u32 in[4]; - - in[0] = pws[gid].i[i + 0]; - in[1] = pws[gid].i[i + 1]; - in[2] = pws[gid].i[i + 2]; - in[3] = pws[gid].i[i + 3]; - - u32 out0[4]; - u32 out1[4]; - - make_utf16be_S (in, out0, out1); - - w[j + 0] = out0[0]; - w[j + 1] = out0[1]; - w[j + 2] = out0[2]; - w[j + 3] = out0[3]; - w[j + 4] = out1[0]; - w[j + 5] = out1[1]; - w[j + 6] = out1[2]; - w[j + 7] = out1[3]; + w[j] = ((w[j] >> 8) & 0x00ff00ff) + | ((w[j] << 8) & 0xff00ff00); } sha256_hmac_ctx_t sha256_hmac_ctx; - sha256_hmac_init_swap (&sha256_hmac_ctx, w, pw_len_utf16be); + sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len); u32 s0[4] = { 0 }; u32 s1[4] = { 0 }; @@ -427,44 +385,96 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) tmps[gid].P[k + 0] = tmp0; tmps[gid].P[k + 1] = tmp1; } + + for (u32 l = 0; l < SCRYPT_CNT4; l += 4) + { + uint4 T[4]; + + T[0] = tmps[gid].P[l + 0]; + T[1] = tmps[gid].P[l + 1]; + T[2] = tmps[gid].P[l + 2]; + T[3] = tmps[gid].P[l + 3]; + + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + uint4 X[4]; + + #ifdef IS_CUDA + X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #else + X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w); + #endif + + tmps[gid].P[l + 0] = X[0]; + tmps[gid].P[l + 1] = X[1]; + tmps[gid].P[l + 2] = X[2]; + tmps[gid].P[l + 3] = X[3]; + } } -KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) +KERNEL_FQ void m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { + /** + * base + */ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); if (gid >= gid_max) return; + // SCRYPT part, init V + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; uint4 X[STATE_CNT4]; - uint4 T[STATE_CNT4]; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]); + const u32 P_offset = salt_repeat * STATE_CNT4; - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]); + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - #if SCRYPT_P >= 1 - for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4) - { - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; +} - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]); - } - #endif +KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; + GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; + GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; + GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; + + uint4 X[STATE_CNT4]; + + const u32 P_offset = salt_repeat * STATE_CNT4; + + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; + + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; + + scrypt_smix_loop (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -530,41 +540,24 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) * 2nd pbkdf2, creates B */ - // convert password to utf16be: + u32 w[128] = { 0 }; - const u32 pw_len = pws[gid].pw_len; + hc_enc_t hc_enc; - const u32 pw_len_utf16be = pw_len * 2; + hc_enc_init (&hc_enc); - u32 w[128] = { 0 }; + const u32 w_len = hc_enc_next_global (&hc_enc, pws[gid].i, pws[gid].pw_len, 256, w, sizeof (w)); - for (u32 i = 0, j = 0; i < 64; i += 4, j += 8) + // utf16le to utf16be + for (int i = 0, j = 0; i < w_len; i += 4, j += 1) { - u32 in[4]; - - in[0] = pws[gid].i[i + 0]; - in[1] = pws[gid].i[i + 1]; - in[2] = pws[gid].i[i + 2]; - in[3] = pws[gid].i[i + 3]; - - u32 out0[4]; - u32 out1[4]; - - make_utf16be_S (in, out0, out1); - - w[j + 0] = out0[0]; - w[j + 1] = out0[1]; - w[j + 2] = out0[2]; - w[j + 3] = out0[3]; - w[j + 4] = out1[0]; - w[j + 5] = out1[1]; - w[j + 6] = out1[2]; - w[j + 7] = out1[3]; + w[j] = ((w[j] >> 8) & 0x00ff00ff) + | ((w[j] << 8) & 0xff00ff00); } sha256_hmac_ctx_t ctx; - sha256_hmac_init_swap (&ctx, w, pw_len_utf16be); + sha256_hmac_init_swap (&ctx, w, w_len); u32 w0[4]; u32 w1[4]; @@ -573,35 +566,48 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) for (u32 l = 0; l < SCRYPT_CNT4; l += 4) { - uint4 tmp; - - tmp = tmps[gid].P[l + 0]; + uint4 X[4]; - w0[0] = tmp.x; - w0[1] = tmp.y; - w0[2] = tmp.z; - w0[3] = tmp.w; + X[0] = tmps[gid].P[l + 0]; + X[1] = tmps[gid].P[l + 1]; + X[2] = tmps[gid].P[l + 2]; + X[3] = tmps[gid].P[l + 3]; - tmp = tmps[gid].P[l + 1]; + uint4 T[4]; - w1[0] = tmp.x; - w1[1] = tmp.y; - w1[2] = tmp.z; - w1[3] = tmp.w; - - tmp = tmps[gid].P[l + 2]; - - w2[0] = tmp.x; - w2[1] = tmp.y; - w2[2] = tmp.z; - w2[3] = tmp.w; - - tmp = tmps[gid].P[l + 3]; + #ifdef IS_CUDA + T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #else + T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w); + #endif - w3[0] = tmp.x; - w3[1] = tmp.y; - w3[2] = tmp.z; - w3[3] = tmp.w; + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + w0[0] = T[0].x; + w0[1] = T[0].y; + w0[2] = T[0].z; + w0[3] = T[0].w; + w1[0] = T[1].x; + w1[1] = T[1].y; + w1[2] = T[1].z; + w1[3] = T[1].w; + w2[0] = T[2].x; + w2[1] = T[2].y; + w2[2] = T[2].z; + w2[3] = T[2].w; + w3[0] = T[3].x; + w3[1] = T[3].y; + w3[2] = T[3].z; + w3[3] = T[3].w; sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -671,7 +677,7 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) if (is_valid_bitcoinj (dec) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } @@ -695,7 +701,7 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) if (is_valid_bitcoinj (dec) == 1) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m23001_a0-optimized.cl b/OpenCL/m23001_a0-optimized.cl index 800521e0c..aa1f17320 100644 --- a/OpenCL/m23001_a0-optimized.cl +++ b/OpenCL/m23001_a0-optimized.cl @@ -344,7 +344,7 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -678,7 +678,7 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23001_a0-pure.cl b/OpenCL/m23001_a0-pure.cl index 8bf4f6855..766382acb 100644 --- a/OpenCL/m23001_a0-pure.cl +++ b/OpenCL/m23001_a0-pure.cl @@ -210,7 +210,7 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -402,7 +402,7 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23001_a1-optimized.cl b/OpenCL/m23001_a1-optimized.cl index db7dce410..4c0a976ad 100644 --- a/OpenCL/m23001_a1-optimized.cl +++ b/OpenCL/m23001_a1-optimized.cl @@ -400,7 +400,7 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -792,7 +792,7 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23001_a1-pure.cl b/OpenCL/m23001_a1-pure.cl index cd0762c9d..1c1ec65ab 100644 --- a/OpenCL/m23001_a1-pure.cl +++ b/OpenCL/m23001_a1-pure.cl @@ -206,7 +206,7 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -396,7 +396,7 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23001_a3-optimized.cl b/OpenCL/m23001_a3-optimized.cl index 27de74343..bd2ab1861 100644 --- a/OpenCL/m23001_a3-optimized.cl +++ b/OpenCL/m23001_a3-optimized.cl @@ -370,7 +370,7 @@ DECLSPEC void m23001m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -724,7 +724,7 @@ DECLSPEC void m23001s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -820,7 +820,7 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -911,7 +911,7 @@ KERNEL_FQ void m23001_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1002,7 +1002,7 @@ KERNEL_FQ void m23001_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1093,7 +1093,7 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1184,7 +1184,7 @@ KERNEL_FQ void m23001_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1275,5 +1275,5 @@ KERNEL_FQ void m23001_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m23001_a3-pure.cl b/OpenCL/m23001_a3-pure.cl index 0ac32cc4a..0f77b0740 100644 --- a/OpenCL/m23001_a3-pure.cl +++ b/OpenCL/m23001_a3-pure.cl @@ -219,7 +219,7 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -422,7 +422,7 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23002_a0-optimized.cl b/OpenCL/m23002_a0-optimized.cl index 5c45093ab..c37aad9e6 100644 --- a/OpenCL/m23002_a0-optimized.cl +++ b/OpenCL/m23002_a0-optimized.cl @@ -397,7 +397,7 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -784,7 +784,7 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23002_a0-pure.cl b/OpenCL/m23002_a0-pure.cl index 278dfe8e7..a1e43a58b 100644 --- a/OpenCL/m23002_a0-pure.cl +++ b/OpenCL/m23002_a0-pure.cl @@ -263,7 +263,7 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -508,7 +508,7 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23002_a1-optimized.cl b/OpenCL/m23002_a1-optimized.cl index 92809b922..d3820d763 100644 --- a/OpenCL/m23002_a1-optimized.cl +++ b/OpenCL/m23002_a1-optimized.cl @@ -453,7 +453,7 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -898,7 +898,7 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23002_a1-pure.cl b/OpenCL/m23002_a1-pure.cl index 97804af53..cfeb1f8e2 100644 --- a/OpenCL/m23002_a1-pure.cl +++ b/OpenCL/m23002_a1-pure.cl @@ -259,7 +259,7 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -502,7 +502,7 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23002_a3-optimized.cl b/OpenCL/m23002_a3-optimized.cl index 4a4ee3dd6..3bc4e1f13 100644 --- a/OpenCL/m23002_a3-optimized.cl +++ b/OpenCL/m23002_a3-optimized.cl @@ -423,7 +423,7 @@ DECLSPEC void m23002m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -830,7 +830,7 @@ DECLSPEC void m23002s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -926,7 +926,7 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1017,7 +1017,7 @@ KERNEL_FQ void m23002_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1108,7 +1108,7 @@ KERNEL_FQ void m23002_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1199,7 +1199,7 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1290,7 +1290,7 @@ KERNEL_FQ void m23002_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1381,5 +1381,5 @@ KERNEL_FQ void m23002_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m23002_a3-pure.cl b/OpenCL/m23002_a3-pure.cl index ef1968170..f139efe46 100644 --- a/OpenCL/m23002_a3-pure.cl +++ b/OpenCL/m23002_a3-pure.cl @@ -272,7 +272,7 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -528,7 +528,7 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23003_a0-optimized.cl b/OpenCL/m23003_a0-optimized.cl index 36956f8cf..ccdcef901 100644 --- a/OpenCL/m23003_a0-optimized.cl +++ b/OpenCL/m23003_a0-optimized.cl @@ -399,7 +399,7 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -788,7 +788,7 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23003_a0-pure.cl b/OpenCL/m23003_a0-pure.cl index 1dcf9cca5..22b844159 100644 --- a/OpenCL/m23003_a0-pure.cl +++ b/OpenCL/m23003_a0-pure.cl @@ -265,7 +265,7 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -512,7 +512,7 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23003_a1-optimized.cl b/OpenCL/m23003_a1-optimized.cl index 341d4f476..d972fc711 100644 --- a/OpenCL/m23003_a1-optimized.cl +++ b/OpenCL/m23003_a1-optimized.cl @@ -455,7 +455,7 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -902,7 +902,7 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23003_a1-pure.cl b/OpenCL/m23003_a1-pure.cl index 138d80829..ef5680864 100644 --- a/OpenCL/m23003_a1-pure.cl +++ b/OpenCL/m23003_a1-pure.cl @@ -261,7 +261,7 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -506,7 +506,7 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23003_a3-optimized.cl b/OpenCL/m23003_a3-optimized.cl index bc5361ad7..d94e79c8c 100644 --- a/OpenCL/m23003_a3-optimized.cl +++ b/OpenCL/m23003_a3-optimized.cl @@ -425,7 +425,7 @@ DECLSPEC void m23003m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -834,7 +834,7 @@ DECLSPEC void m23003s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -930,7 +930,7 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1021,7 +1021,7 @@ KERNEL_FQ void m23003_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1112,7 +1112,7 @@ KERNEL_FQ void m23003_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1203,7 +1203,7 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1294,7 +1294,7 @@ KERNEL_FQ void m23003_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1385,5 +1385,5 @@ KERNEL_FQ void m23003_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); + m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m23003_a3-pure.cl b/OpenCL/m23003_a3-pure.cl index c4415fdd1..530bd5f9e 100644 --- a/OpenCL/m23003_a3-pure.cl +++ b/OpenCL/m23003_a3-pure.cl @@ -274,7 +274,7 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } @@ -532,7 +532,7 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } diff --git a/OpenCL/m23100-pure.cl b/OpenCL/m23100-pure.cl index ed0957e23..09918dc77 100644 --- a/OpenCL/m23100-pure.cl +++ b/OpenCL/m23100-pure.cl @@ -325,7 +325,7 @@ KERNEL_FQ void m23100_comp (KERN_ATTR_TMPS_ESALT (keychain_tmp_t, keychain_t)) if ((out[1] ^ iv[1]) == 0x04040404) // this check uses very low number of bits => collisions { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m23300-pure.cl b/OpenCL/m23300-pure.cl index b73be81db..984c477be 100644 --- a/OpenCL/m23300-pure.cl +++ b/OpenCL/m23300-pure.cl @@ -400,7 +400,7 @@ KERNEL_FQ void m23300_comp (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t)) (res[10] == checksum[2]) && (res[11] == checksum[3])) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m23500-pure.cl b/OpenCL/m23500-pure.cl index cb28b7f0d..e18268d61 100644 --- a/OpenCL/m23500-pure.cl +++ b/OpenCL/m23500-pure.cl @@ -511,7 +511,7 @@ KERNEL_FQ void m23500_comp (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) if ((tmps[gid].data[0] == 0xa6a6a6a6) && (tmps[gid].data[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m23600-pure.cl b/OpenCL/m23600-pure.cl index 3c1a169c0..b14c955d0 100644 --- a/OpenCL/m23600-pure.cl +++ b/OpenCL/m23600-pure.cl @@ -527,7 +527,7 @@ KERNEL_FQ void m23600_comp (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) if ((tmps[gid].data[0] == 0xa6a6a6a6) && (tmps[gid].data[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m23700-optimized.cl b/OpenCL/m23700-optimized.cl index 3968033da..f496a32e2 100644 --- a/OpenCL/m23700-optimized.cl +++ b/OpenCL/m23700-optimized.cl @@ -161,15 +161,20 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) if (gid >= gid_max) return; - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); u32 salt_buf[2]; @@ -184,7 +189,7 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) u32 p = 0; - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (cb, p, GETCHAR (pw_buf, j)); } @@ -194,8 +199,8 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) PUTCHAR_BE (cb, p, GETCHAR (salt_buf, j)); } - const u32 p2 = (pw_len * 2) + salt_len; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p2 = pw_len + salt_len; + const u32 p3 = pw_len + salt_len + 3; const u32 init_pos = loop_pos / (ROUNDS / 16); @@ -491,15 +496,20 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) if (gid >= gid_max) return; - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); u32 salt_buf[2]; @@ -518,7 +528,7 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) for (u32 i = 0, p = 0; i < 64; i++) { - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j)); } @@ -533,9 +543,9 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) p += 3; } - const u32 p2 = (pw_len * 2) + salt_len; + const u32 p2 = pw_len + salt_len; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p3 = pw_len + salt_len + 3; const u32 init_pos = loop_pos / (ROUNDS / 16); @@ -589,23 +599,15 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) u32 tmp0 = 0; u32 tmp1 = 0; - switch (k & 3) - { - case 0: tmp0 = iter_s >> 0; mask0 = 0x0000ffff; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 1: tmp0 = iter_s >> 8; mask0 = 0xff0000ff; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 2: tmp0 = iter_s >> 16; mask0 = 0xffff0000; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 3: tmp0 = iter_s >> 24; mask0 = 0xffffff00; - tmp1 = iter_s << 8; mask1 = 0x00ffffff; - break; - } + const int kd = k / 4; + const int km = k & 3; + + if (km == 0) { tmp0 = iter_s >> 0; tmp1 = 0; mask0 = 0x0000ffff; mask1 = 0xffffffff; } + else if (km == 1) { tmp0 = iter_s >> 8; tmp1 = 0; mask0 = 0xff0000ff; mask1 = 0xffffffff; } + else if (km == 2) { tmp0 = iter_s >> 16; tmp1 = 0; mask0 = 0xffff0000; mask1 = 0xffffffff; } + else if (km == 3) { tmp0 = iter_s >> 24; tmp1 = iter_s << 8; mask0 = 0xffffff00; mask1 = 0x00ffffff; } - switch (k / 4) + switch (kd) { case 0: w[ 0] = (w[ 0] & mask0) | tmp0; w[ 1] = (w[ 1] & mask1) | tmp1; @@ -747,11 +749,11 @@ KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) * base */ - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); const u32 salt_len = 8; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p3 = pw_len + salt_len + 3; u32 w0[4]; u32 w1[4]; @@ -805,13 +807,18 @@ KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) for (int i = 0; i < 16; i++) { - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; //const u32 pw_len = pws[gid].pw_len; @@ -822,7 +829,7 @@ KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) //const u32 salt_len = 8; - //const u32 p3 = (pw_len * 2) + salt_len + 3; + //const u32 p3 = pw_len + salt_len + 3; u32 w[16]; @@ -845,7 +852,7 @@ KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) u32 p = 0; - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (w, p, GETCHAR (pw_buf, j)); } diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl index 3675e4515..ec62394a1 100644 --- a/OpenCL/m23700-pure.cl +++ b/OpenCL/m23700-pure.cl @@ -161,54 +161,22 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co switch (div) { - case 0: w0[0] |= tmp0; - w0[1] = tmp1; - break; - case 1: w0[1] |= tmp0; - w0[2] = tmp1; - break; - case 2: w0[2] |= tmp0; - w0[3] = tmp1; - break; - case 3: w0[3] |= tmp0; - w1[0] = tmp1; - break; - case 4: w1[0] |= tmp0; - w1[1] = tmp1; - break; - case 5: w1[1] |= tmp0; - w1[2] = tmp1; - break; - case 6: w1[2] |= tmp0; - w1[3] = tmp1; - break; - case 7: w1[3] |= tmp0; - w2[0] = tmp1; - break; - case 8: w2[0] |= tmp0; - w2[1] = tmp1; - break; - case 9: w2[1] |= tmp0; - w2[2] = tmp1; - break; - case 10: w2[2] |= tmp0; - w2[3] = tmp1; - break; - case 11: w2[3] |= tmp0; - w3[0] = tmp1; - break; - case 12: w3[0] |= tmp0; - w3[1] = tmp1; - break; - case 13: w3[1] |= tmp0; - w3[2] = tmp1; - break; - case 14: w3[2] |= tmp0; - w3[3] = tmp1; - break; - case 15: w3[3] |= tmp0; - carry = tmp1; - break; + case 0: w0[0] |= tmp0; w0[1] = tmp1; break; + case 1: w0[1] |= tmp0; w0[2] = tmp1; break; + case 2: w0[2] |= tmp0; w0[3] = tmp1; break; + case 3: w0[3] |= tmp0; w1[0] = tmp1; break; + case 4: w1[0] |= tmp0; w1[1] = tmp1; break; + case 5: w1[1] |= tmp0; w1[2] = tmp1; break; + case 6: w1[2] |= tmp0; w1[3] = tmp1; break; + case 7: w1[3] |= tmp0; w2[0] = tmp1; break; + case 8: w2[0] |= tmp0; w2[1] = tmp1; break; + case 9: w2[1] |= tmp0; w2[2] = tmp1; break; + case 10: w2[2] |= tmp0; w2[3] = tmp1; break; + case 11: w2[3] |= tmp0; w3[0] = tmp1; break; + case 12: w3[0] |= tmp0; w3[1] = tmp1; break; + case 13: w3[1] |= tmp0; w3[2] = tmp1; break; + case 14: w3[2] |= tmp0; w3[3] = tmp1; break; + default: w3[3] |= tmp0; carry = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15: } const u32 new_len = func_len + 3; @@ -590,7 +558,9 @@ DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (bytes == 0) return; + + const int pos = ctx->len & 63; int len = 64; @@ -723,7 +693,9 @@ DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) u32 w2[4]; u32 w3[4]; - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; int pos1 = 0; int pos4 = 0; @@ -877,38 +849,17 @@ KERNEL_FQ void m23700_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) const u32 pw_len = pws[gid].pw_len; - // first set the utf16le pass: - u32 w[80] = { 0 }; - for (u32 i = 0, j = 0, k = 0; i < pw_len; i += 16, j += 4, k += 8) + for (int i = 0, j = 0; i < pw_len; i += 4, j += 1) { - u32 a[4]; - - a[0] = pws[gid].i[j + 0]; - a[1] = pws[gid].i[j + 1]; - a[2] = pws[gid].i[j + 2]; - a[3] = pws[gid].i[j + 3]; - - u32 b[4]; - u32 c[4]; - - make_utf16le (a, b, c); - - w[k + 0] = hc_swap32_S (b[0]); - w[k + 1] = hc_swap32_S (b[1]); - w[k + 2] = hc_swap32_S (b[2]); - w[k + 3] = hc_swap32_S (b[3]); - w[k + 4] = hc_swap32_S (c[0]); - w[k + 5] = hc_swap32_S (c[1]); - w[k + 6] = hc_swap32_S (c[2]); - w[k + 7] = hc_swap32_S (c[3]); + w[j] = hc_swap32_S (pws[gid].i[j]); } // append salt: - const u32 salt_idx = (pw_len * 2) / 4; - const u32 salt_off = (pw_len * 2) & 3; + const u32 salt_idx = pw_len / 4; + const u32 salt_off = pw_len & 3; u32 salt_buf[3]; @@ -925,10 +876,9 @@ KERNEL_FQ void m23700_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) salt_buf[0] = (salt_buf[0] >> 16); } - w[salt_idx] |= salt_buf[0]; - - w[salt_idx + 1] = salt_buf[1]; - w[salt_idx + 2] = salt_buf[2]; + w[salt_idx + 0] |= salt_buf[0]; + w[salt_idx + 1] = salt_buf[1]; + w[salt_idx + 2] = salt_buf[2]; // store initial w[] (pass and salt) in tmps: @@ -955,17 +905,17 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; - u32 w[80] = { 0 }; // 64 byte aligned + u32 w[80] = { 0 }; - for (u32 i = 0; i < 66; i++) // unroll ? + for (u32 i = 0; i < 66; i++) { w[i] = tmps[gid].w[i]; } @@ -1129,11 +1079,11 @@ KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; diff --git a/OpenCL/m23800-optimized.cl b/OpenCL/m23800-optimized.cl index fdaf41875..a26769e7b 100644 --- a/OpenCL/m23800-optimized.cl +++ b/OpenCL/m23800-optimized.cl @@ -71,15 +71,20 @@ KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, if (gid >= gid_max) return; - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); u32 salt_buf[2]; @@ -98,7 +103,7 @@ KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, for (u32 i = 0, p = 0; i < 64; i++) { - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j)); } @@ -113,9 +118,9 @@ KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, p += 3; } - const u32 p2 = (pw_len * 2) + salt_len; + const u32 p2 = pw_len + salt_len; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p3 = pw_len + salt_len + 3; const u32 init_pos = loop_pos / (ROUNDS / 16); @@ -169,23 +174,15 @@ KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, u32 tmp0 = 0; u32 tmp1 = 0; - switch (k & 3) - { - case 0: tmp0 = iter_s >> 0; mask0 = 0x0000ffff; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 1: tmp0 = iter_s >> 8; mask0 = 0xff0000ff; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 2: tmp0 = iter_s >> 16; mask0 = 0xffff0000; - tmp1 = 0; mask1 = 0xffffffff; - break; - case 3: tmp0 = iter_s >> 24; mask0 = 0xffffff00; - tmp1 = iter_s << 8; mask1 = 0x00ffffff; - break; - } + const int kd = k / 4; + const int km = k & 3; + + if (km == 0) { tmp0 = iter_s >> 0; tmp1 = 0; mask0 = 0x0000ffff; mask1 = 0xffffffff; } + else if (km == 1) { tmp0 = iter_s >> 8; tmp1 = 0; mask0 = 0xff0000ff; mask1 = 0xffffffff; } + else if (km == 2) { tmp0 = iter_s >> 16; tmp1 = 0; mask0 = 0xffff0000; mask1 = 0xffffffff; } + else if (km == 3) { tmp0 = iter_s >> 24; tmp1 = iter_s << 8; mask0 = 0xffffff00; mask1 = 0x00ffffff; } - switch (k / 4) + switch (kd) { case 0: w[ 0] = (w[ 0] & mask0) | tmp0; w[ 1] = (w[ 1] & mask1) | tmp1; @@ -320,11 +317,11 @@ KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_ * base */ - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); const u32 salt_len = 8; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p3 = pw_len + salt_len + 3; u32 w0[4]; u32 w1[4]; @@ -367,13 +364,18 @@ KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_ for (int i = 0; i < 16; i++) { - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; //const u32 pw_len = pws[gid].pw_len; @@ -384,7 +386,7 @@ KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_ //const u32 salt_len = 8; - //const u32 p3 = (pw_len * 2) + salt_len + 3; + //const u32 p3 = pw_len + salt_len + 3; u32 w[16]; @@ -407,7 +409,7 @@ KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_ u32 p = 0; - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (w, p, GETCHAR (pw_buf, j)); } diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl index 70a28fe36..1629433c0 100644 --- a/OpenCL/m23800-pure.cl +++ b/OpenCL/m23800-pure.cl @@ -72,54 +72,22 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co switch (div) { - case 0: w0[0] |= tmp0; - w0[1] = tmp1; - break; - case 1: w0[1] |= tmp0; - w0[2] = tmp1; - break; - case 2: w0[2] |= tmp0; - w0[3] = tmp1; - break; - case 3: w0[3] |= tmp0; - w1[0] = tmp1; - break; - case 4: w1[0] |= tmp0; - w1[1] = tmp1; - break; - case 5: w1[1] |= tmp0; - w1[2] = tmp1; - break; - case 6: w1[2] |= tmp0; - w1[3] = tmp1; - break; - case 7: w1[3] |= tmp0; - w2[0] = tmp1; - break; - case 8: w2[0] |= tmp0; - w2[1] = tmp1; - break; - case 9: w2[1] |= tmp0; - w2[2] = tmp1; - break; - case 10: w2[2] |= tmp0; - w2[3] = tmp1; - break; - case 11: w2[3] |= tmp0; - w3[0] = tmp1; - break; - case 12: w3[0] |= tmp0; - w3[1] = tmp1; - break; - case 13: w3[1] |= tmp0; - w3[2] = tmp1; - break; - case 14: w3[2] |= tmp0; - w3[3] = tmp1; - break; - case 15: w3[3] |= tmp0; - carry = tmp1; - break; + case 0: w0[0] |= tmp0; w0[1] = tmp1; break; + case 1: w0[1] |= tmp0; w0[2] = tmp1; break; + case 2: w0[2] |= tmp0; w0[3] = tmp1; break; + case 3: w0[3] |= tmp0; w1[0] = tmp1; break; + case 4: w1[0] |= tmp0; w1[1] = tmp1; break; + case 5: w1[1] |= tmp0; w1[2] = tmp1; break; + case 6: w1[2] |= tmp0; w1[3] = tmp1; break; + case 7: w1[3] |= tmp0; w2[0] = tmp1; break; + case 8: w2[0] |= tmp0; w2[1] = tmp1; break; + case 9: w2[1] |= tmp0; w2[2] = tmp1; break; + case 10: w2[2] |= tmp0; w2[3] = tmp1; break; + case 11: w2[3] |= tmp0; w3[0] = tmp1; break; + case 12: w3[0] |= tmp0; w3[1] = tmp1; break; + case 13: w3[1] |= tmp0; w3[2] = tmp1; break; + case 14: w3[2] |= tmp0; w3[3] = tmp1; break; + default: w3[3] |= tmp0; carry = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15: } const u32 new_len = func_len + 3; @@ -501,7 +469,9 @@ DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (bytes == 0) return; + + const int pos = ctx->len & 63; int len = 64; @@ -634,7 +604,9 @@ DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) u32 w2[4]; u32 w3[4]; - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; int pos1 = 0; int pos4 = 0; @@ -788,38 +760,17 @@ KERNEL_FQ void m23800_init (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, const u32 pw_len = pws[gid].pw_len; - // first set the utf16le pass: - u32 w[80] = { 0 }; - for (u32 i = 0, j = 0, k = 0; i < pw_len; i += 16, j += 4, k += 8) + for (int i = 0, j = 0; i < pw_len; i += 4, j += 1) { - u32 a[4]; - - a[0] = pws[gid].i[j + 0]; - a[1] = pws[gid].i[j + 1]; - a[2] = pws[gid].i[j + 2]; - a[3] = pws[gid].i[j + 3]; - - u32 b[4]; - u32 c[4]; - - make_utf16le (a, b, c); - - w[k + 0] = hc_swap32_S (b[0]); - w[k + 1] = hc_swap32_S (b[1]); - w[k + 2] = hc_swap32_S (b[2]); - w[k + 3] = hc_swap32_S (b[3]); - w[k + 4] = hc_swap32_S (c[0]); - w[k + 5] = hc_swap32_S (c[1]); - w[k + 6] = hc_swap32_S (c[2]); - w[k + 7] = hc_swap32_S (c[3]); + w[j] = hc_swap32_S (pws[gid].i[j]); } // append salt: - const u32 salt_idx = (pw_len * 2) / 4; - const u32 salt_off = (pw_len * 2) & 3; + const u32 salt_idx = pw_len / 4; + const u32 salt_off = pw_len & 3; u32 salt_buf[3]; @@ -836,10 +787,9 @@ KERNEL_FQ void m23800_init (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, salt_buf[0] = (salt_buf[0] >> 16); } - w[salt_idx] |= salt_buf[0]; - - w[salt_idx + 1] = salt_buf[1]; - w[salt_idx + 2] = salt_buf[2]; + w[salt_idx + 0] |= salt_buf[0]; + w[salt_idx + 1] = salt_buf[1]; + w[salt_idx + 2] = salt_buf[2]; // store initial w[] (pass and salt) in tmps: @@ -866,17 +816,17 @@ KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; - u32 w[80] = { 0 }; // 64 byte aligned + u32 w[80] = { 0 }; - for (u32 i = 0; i < 66; i++) // unroll ? + for (u32 i = 0; i < 66; i++) { w[i] = tmps[gid].w[i]; } @@ -1033,11 +983,11 @@ KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_ * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; diff --git a/OpenCL/m23900-pure.cl b/OpenCL/m23900-pure.cl index 7d2c6616f..a70d2a0ba 100644 --- a/OpenCL/m23900-pure.cl +++ b/OpenCL/m23900-pure.cl @@ -444,7 +444,7 @@ KERNEL_FQ void m23900_comp (KERN_ATTR_TMPS_ESALT (bestcrypt_tmp_t, bestcrypt_t)) (digest[2] == res[18]) && (digest[3] == res[19])) { - if (atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } diff --git a/OpenCL/m24100-pure.cl b/OpenCL/m24100-pure.cl new file mode 100644 index 000000000..3b2cb60f3 --- /dev/null +++ b/OpenCL/m24100-pure.cl @@ -0,0 +1,366 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#include "inc_hash_sha1.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct mongodb_sha1_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[5]; + u32 out[5]; + +} mongodb_sha1_tmp_t; + +typedef struct mongodb_sha1 +{ + u32 salt[16]; + u32 user[16]; + + u32 user_len; + +} mongodb_sha1_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24100_init (KERN_ATTR_TMPS_ESALT (mongodb_sha1_tmp_t, mongodb_sha1_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + md5_ctx_t md5_ctx; + + md5_init (&md5_ctx); + + md5_update_global (&md5_ctx, esalt_bufs[DIGESTS_OFFSET].user, esalt_bufs[DIGESTS_OFFSET].user_len); + md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len); + + md5_final (&md5_ctx); + + u32 a = md5_ctx.h[0]; + u32 b = md5_ctx.h[1]; + u32 c = md5_ctx.h[2]; + u32 d = md5_ctx.h[3]; + + #define uint_to_hex_lower8(i) l_bin2asc[(i)] + + u32 hex[16] = { 0 }; + + hex[0] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + hex[1] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + hex[2] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + hex[3] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + hex[4] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + hex[5] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + hex[6] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + hex[7] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init (&sha1_hmac_ctx, hex, 32); + + tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + + tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + + sha1_hmac_update_global (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt, 16); + + for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) + { + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + } +} + +KERNEL_FQ void m24100_loop (KERN_ATTR_TMPS_ESALT (mongodb_sha1_tmp_t, mongodb_sha1_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[5]; + u32x opad[5]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + + for (u32 i = 0; i < 4; i += 5) + { + u32x dgst[5]; + u32x out[5]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + } +} + +KERNEL_FQ void m24100_comp (KERN_ATTR_TMPS_ESALT (mongodb_sha1_tmp_t, mongodb_sha1_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u64 lid = get_local_id (0); + + u32 out[5]; + + out[0] = tmps[gid].out[0]; + out[1] = tmps[gid].out[1]; + out[2] = tmps[gid].out[2]; + out[3] = tmps[gid].out[3]; + out[4] = tmps[gid].out[4]; + + // HMAC-SHA1 with "Server Key" salt: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = out[0]; + w0[1] = out[1]; + w0[2] = out[2]; + w0[3] = out[3]; + w1[0] = out[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init_64 (&sha1_hmac_ctx, w0, w1, w2, w3); + + w0[0] = 0x53657276; // Serv + w0[1] = 0x6572204b; // er K + w0[2] = 0x65790000; // ey + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx, w0, w1, w2, w3, 10); + + sha1_hmac_final (&sha1_hmac_ctx); + + const u32 r0 = sha1_hmac_ctx.opad.h[DGST_R0]; + const u32 r1 = sha1_hmac_ctx.opad.h[DGST_R1]; + const u32 r2 = sha1_hmac_ctx.opad.h[DGST_R2]; + const u32 r3 = sha1_hmac_ctx.opad.h[DGST_R3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24200-pure.cl b/OpenCL/m24200-pure.cl new file mode 100644 index 000000000..0efa80e05 --- /dev/null +++ b/OpenCL/m24200-pure.cl @@ -0,0 +1,353 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct mongodb_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[8]; + u32 out[8]; + +} mongodb_sha256_tmp_t; + +typedef struct mongodb_sha256 +{ + u32 salt[16]; + u32 user[16]; + + u32 user_len; + +} mongodb_sha256_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24200_init (KERN_ATTR_TMPS_ESALT (mongodb_sha256_tmp_t, mongodb_sha256_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt, 28); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24200_loop (KERN_ATTR_TMPS_ESALT (mongodb_sha256_tmp_t, mongodb_sha256_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24200_comp (KERN_ATTR_TMPS_ESALT (mongodb_sha256_tmp_t, mongodb_sha256_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u64 lid = get_local_id (0); + + u32 out[8]; + + out[0] = tmps[gid].out[0]; + out[1] = tmps[gid].out[1]; + out[2] = tmps[gid].out[2]; + out[3] = tmps[gid].out[3]; + out[4] = tmps[gid].out[4]; + out[5] = tmps[gid].out[5]; + out[6] = tmps[gid].out[6]; + out[7] = tmps[gid].out[7]; + + // HMAC-SHA256 with "Server Key" salt: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = out[0]; + w0[1] = out[1]; + w0[2] = out[2]; + w0[3] = out[3]; + w1[0] = out[4]; + w1[1] = out[5]; + w1[2] = out[6]; + w1[3] = out[7]; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_64 (&sha256_hmac_ctx, w0, w1, w2, w3); + + w0[0] = 0x53657276; // Serv + w0[1] = 0x6572204b; // er K + w0[2] = 0x65790000; // ey + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx, w0, w1, w2, w3, 10); + + sha256_hmac_final (&sha256_hmac_ctx); + + const u32 r0 = sha256_hmac_ctx.opad.h[DGST_R0]; + const u32 r1 = sha256_hmac_ctx.opad.h[DGST_R1]; + const u32 r2 = sha256_hmac_ctx.opad.h[DGST_R2]; + const u32 r3 = sha256_hmac_ctx.opad.h[DGST_R3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24300_a0-optimized.cl b/OpenCL/m24300_a0-optimized.cl new file mode 100644 index 000000000..9c494d9a8 --- /dev/null +++ b/OpenCL/m24300_a0-optimized.cl @@ -0,0 +1,1272 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_m04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = out_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24300_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24300_s04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = out_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24300_s16 (KERN_ATTR_RULES ()) +{ +} \ No newline at end of file diff --git a/OpenCL/m24300_a0-pure.cl b/OpenCL/m24300_a0-pure.cl new file mode 100644 index 000000000..b919bf279 --- /dev/null +++ b/OpenCL/m24300_a0-pure.cl @@ -0,0 +1,269 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_swap (&ctx1, tmp.i, tmp.pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24300_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_swap (&ctx1, tmp.i, tmp.pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24300_a1-optimized.cl b/OpenCL/m24300_a1-optimized.cl new file mode 100644 index 000000000..21c3ce379 --- /dev/null +++ b/OpenCL/m24300_a1-optimized.cl @@ -0,0 +1,1381 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = pw_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24300_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24300_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = pw_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24300_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24300_a1-pure.cl b/OpenCL/m24300_a1-pure.cl new file mode 100644 index 000000000..c6b5254a0 --- /dev/null +++ b/OpenCL/m24300_a1-pure.cl @@ -0,0 +1,263 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_ctx_t ctx1l; + + sha1_init (&ctx1l); + + sha1_update_global_swap (&ctx1l, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx1 = ctx1l; + + sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24300_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_ctx_t ctx1l; + + sha1_init (&ctx1l); + + sha1_update_global_swap (&ctx1l, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx1 = ctx1l; + + sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24300_a3-optimized.cl b/OpenCL/m24300_a3-optimized.cl new file mode 100644 index 000000000..a641a98e5 --- /dev/null +++ b/OpenCL/m24300_a3-optimized.cl @@ -0,0 +1,1589 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +DECLSPEC void m24300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + u32 salt_buf0_t[4]; + u32 salt_buf1_t[4]; + u32 salt_buf2_t[4]; + u32 salt_buf3_t[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + salt_buf0_t[0] = salt_buf0[0]; + salt_buf0_t[1] = salt_buf0[1]; + salt_buf0_t[2] = salt_buf0[2]; + salt_buf0_t[3] = salt_buf0[3]; + salt_buf1_t[0] = salt_buf1[0]; + salt_buf1_t[1] = salt_buf1[1]; + salt_buf1_t[2] = salt_buf1[2]; + salt_buf1_t[3] = salt_buf1[3]; + salt_buf2_t[0] = salt_buf2[0]; + salt_buf2_t[1] = salt_buf2[1]; + salt_buf2_t[2] = salt_buf2[2]; + salt_buf2_t[3] = salt_buf2[3]; + salt_buf3_t[0] = salt_buf3[0]; + salt_buf3_t[1] = salt_buf3[1]; + salt_buf3_t[2] = salt_buf3[2]; + salt_buf3_t[3] = salt_buf3[3]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); + + switch_buffer_by_offset_le_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, pw_len); + + w0[ 0] |= hc_swap32_S (salt_buf0_t[0]); + w0[ 1] |= hc_swap32_S (salt_buf0_t[1]); + w0[ 2] |= hc_swap32_S (salt_buf0_t[2]); + w0[ 3] |= hc_swap32_S (salt_buf0_t[3]); + w1[ 0] |= hc_swap32_S (salt_buf1_t[0]); + w1[ 1] |= hc_swap32_S (salt_buf1_t[1]); + w1[ 2] |= hc_swap32_S (salt_buf1_t[2]); + w1[ 3] |= hc_swap32_S (salt_buf1_t[3]); + w2[ 0] |= hc_swap32_S (salt_buf2_t[0]); + w2[ 1] |= hc_swap32_S (salt_buf2_t[1]); + w2[ 2] |= hc_swap32_S (salt_buf2_t[2]); + w2[ 3] |= hc_swap32_S (salt_buf2_t[3]); + w3[ 0] |= hc_swap32_S (salt_buf3_t[0]); + w3[ 1] |= hc_swap32_S (salt_buf3_t[1]); + w3[ 2] |= hc_swap32_S (salt_buf3_t[2]); + w3[ 3] |= hc_swap32_S (salt_buf3_t[3]); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * sha1 + */ + + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = pw_salt_len * 8; + + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_M_SIMD (d, e, c, b); + } +} + +DECLSPEC void m24300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + u32 salt_buf0_t[4]; + u32 salt_buf1_t[4]; + u32 salt_buf2_t[4]; + u32 salt_buf3_t[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + salt_buf0_t[0] = salt_buf0[0]; + salt_buf0_t[1] = salt_buf0[1]; + salt_buf0_t[2] = salt_buf0[2]; + salt_buf0_t[3] = salt_buf0[3]; + salt_buf1_t[0] = salt_buf1[0]; + salt_buf1_t[1] = salt_buf1[1]; + salt_buf1_t[2] = salt_buf1[2]; + salt_buf1_t[3] = salt_buf1[3]; + salt_buf2_t[0] = salt_buf2[0]; + salt_buf2_t[1] = salt_buf2[1]; + salt_buf2_t[2] = salt_buf2[2]; + salt_buf2_t[3] = salt_buf2[3]; + salt_buf3_t[0] = salt_buf3[0]; + salt_buf3_t[1] = salt_buf3[1]; + salt_buf3_t[2] = salt_buf3[2]; + salt_buf3_t[3] = salt_buf3[3]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); + + switch_buffer_by_offset_le_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, pw_len); + + w0[ 0] |= hc_swap32_S (salt_buf0_t[0]); + w0[ 1] |= hc_swap32_S (salt_buf0_t[1]); + w0[ 2] |= hc_swap32_S (salt_buf0_t[2]); + w0[ 3] |= hc_swap32_S (salt_buf0_t[3]); + w1[ 0] |= hc_swap32_S (salt_buf1_t[0]); + w1[ 1] |= hc_swap32_S (salt_buf1_t[1]); + w1[ 2] |= hc_swap32_S (salt_buf1_t[2]); + w1[ 3] |= hc_swap32_S (salt_buf1_t[3]); + w2[ 0] |= hc_swap32_S (salt_buf2_t[0]); + w2[ 1] |= hc_swap32_S (salt_buf2_t[1]); + w2[ 2] |= hc_swap32_S (salt_buf2_t[2]); + w2[ 3] |= hc_swap32_S (salt_buf2_t[3]); + w3[ 0] |= hc_swap32_S (salt_buf3_t[0]); + w3[ 1] |= hc_swap32_S (salt_buf3_t[1]); + w3[ 2] |= hc_swap32_S (salt_buf3_t[2]); + w3[ 3] |= hc_swap32_S (salt_buf3_t[3]); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * sha1 + */ + + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = pw_salt_len * 8; + + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} diff --git a/OpenCL/m24300_a3-pure.cl b/OpenCL/m24300_a3-pure.cl new file mode 100644 index 000000000..0f70dc9cf --- /dev/null +++ b/OpenCL/m24300_a3-pure.cl @@ -0,0 +1,311 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0lr = w0l | w0r; + + w[0] = w0lr; + + sha1_ctx_vector_t ctx1; + + sha1_init_vector (&ctx1); + + sha1_update_vector (&ctx1, w, pw_len); + + sha1_update_vector (&ctx1, s, salt_len); + + sha1_final_vector (&ctx1); + + const u32x a = ctx1.h[0]; + const u32x b = ctx1.h[1]; + const u32x c = ctx1.h[2]; + const u32x d = ctx1.h[3]; + const u32x e = ctx1.h[4]; + + sha1_ctx_vector_t ctx; + + sha1_init_vector_from_scalar (&ctx, &ctx0); + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_vector_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24300_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0lr = w0l | w0r; + + w[0] = w0lr; + + sha1_ctx_vector_t ctx1; + + sha1_init_vector (&ctx1); + + sha1_update_vector (&ctx1, w, pw_len); + + sha1_update_vector (&ctx1, s, salt_len); + + sha1_final_vector (&ctx1); + + const u32x a = ctx1.h[0]; + const u32x b = ctx1.h[1]; + const u32x c = ctx1.h[2]; + const u32x d = ctx1.h[3]; + const u32x e = ctx1.h[4]; + + sha1_ctx_vector_t ctx; + + sha1_init_vector_from_scalar (&ctx, &ctx0); + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_vector_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24500-pure.cl b/OpenCL/m24500-pure.cl new file mode 100644 index 000000000..def16b997 --- /dev/null +++ b/OpenCL/m24500-pure.cl @@ -0,0 +1,654 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct telegram_tmp +{ + u64 ipad[8]; + u64 opad[8]; + + u64 dgst[24]; + u64 out [24]; + +} telegram_tmp_t; + +typedef struct telegram +{ + u32 data[72]; + +} telegram_t; + +DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + w0[0] = h32_from_64 (digest[0]); + w0[1] = l32_from_64 (digest[0]); + w0[2] = h32_from_64 (digest[1]); + w0[3] = l32_from_64 (digest[1]); + w1[0] = h32_from_64 (digest[2]); + w1[1] = l32_from_64 (digest[2]); + w1[2] = h32_from_64 (digest[3]); + w1[3] = l32_from_64 (digest[3]); + w2[0] = h32_from_64 (digest[4]); + w2[1] = l32_from_64 (digest[4]); + w2[2] = h32_from_64 (digest[5]); + w2[3] = l32_from_64 (digest[5]); + w3[0] = h32_from_64 (digest[6]); + w3[1] = l32_from_64 (digest[6]); + w3[2] = h32_from_64 (digest[7]); + w3[3] = l32_from_64 (digest[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); +} + +DECLSPEC void sha1_run (u32 *w, u32 *res) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = 0x80000000; + w3[1] = 0; + w3[2] = 0; + w3[3] = 48 * 8; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (w0, w1, w2, w3, digest); + + res[0] = digest[0]; + res[1] = digest[1]; + res[2] = digest[2]; + res[3] = digest[3]; + res[4] = digest[4]; +} + +KERNEL_FQ void m24500_init (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; // 32 + + sha512_ctx_t sha512_ctx; + + sha512_init (&sha512_ctx); + + sha512_update_global (&sha512_ctx, salt_bufs[SALT_POS].salt_buf, salt_len); + sha512_update_global_swap (&sha512_ctx, pws[gid].i, pws[gid].pw_len); + sha512_update_global (&sha512_ctx, salt_bufs[SALT_POS].salt_buf, salt_len); + + sha512_final (&sha512_ctx); + + u32 w[32] = { 0 }; + + w[ 0] = h32_from_64_S (sha512_ctx.h[0]); + w[ 1] = l32_from_64_S (sha512_ctx.h[0]); + w[ 2] = h32_from_64_S (sha512_ctx.h[1]); + w[ 3] = l32_from_64_S (sha512_ctx.h[1]); + w[ 4] = h32_from_64_S (sha512_ctx.h[2]); + w[ 5] = l32_from_64_S (sha512_ctx.h[2]); + w[ 6] = h32_from_64_S (sha512_ctx.h[3]); + w[ 7] = l32_from_64_S (sha512_ctx.h[3]); + w[ 8] = h32_from_64_S (sha512_ctx.h[4]); + w[ 9] = l32_from_64_S (sha512_ctx.h[4]); + w[10] = h32_from_64_S (sha512_ctx.h[5]); + w[11] = l32_from_64_S (sha512_ctx.h[5]); + w[12] = h32_from_64_S (sha512_ctx.h[6]); + w[13] = l32_from_64_S (sha512_ctx.h[6]); + w[14] = h32_from_64_S (sha512_ctx.h[7]); + w[15] = l32_from_64_S (sha512_ctx.h[7]); + + sha512_hmac_ctx_t sha512_hmac_ctx; + + sha512_hmac_init (&sha512_hmac_ctx, w, 64); + + tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha512_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha512_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha512_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha512_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha512_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha512_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha512_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha512_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha512_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha512_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha512_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha512_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; + + sha512_hmac_update_global (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 24; i += 8, j += 1) + { + sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + + sha512_hmac_update_128 (&sha512_hmac_ctx2, w0, w1, w2, w3, w4, w5, w6, w7, 4); + + sha512_hmac_final (&sha512_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha512_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha512_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha512_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha512_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha512_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha512_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha512_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha512_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24500_loop (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x ipad[8]; + u64x opad[8]; + + ipad[0] = pack64v (tmps, ipad, gid, 0); + ipad[1] = pack64v (tmps, ipad, gid, 1); + ipad[2] = pack64v (tmps, ipad, gid, 2); + ipad[3] = pack64v (tmps, ipad, gid, 3); + ipad[4] = pack64v (tmps, ipad, gid, 4); + ipad[5] = pack64v (tmps, ipad, gid, 5); + ipad[6] = pack64v (tmps, ipad, gid, 6); + ipad[7] = pack64v (tmps, ipad, gid, 7); + + opad[0] = pack64v (tmps, opad, gid, 0); + opad[1] = pack64v (tmps, opad, gid, 1); + opad[2] = pack64v (tmps, opad, gid, 2); + opad[3] = pack64v (tmps, opad, gid, 3); + opad[4] = pack64v (tmps, opad, gid, 4); + opad[5] = pack64v (tmps, opad, gid, 5); + opad[6] = pack64v (tmps, opad, gid, 6); + opad[7] = pack64v (tmps, opad, gid, 7); + + for (u32 i = 0; i < 24; i += 8) + { + u64x dgst[8]; + u64x out[8]; + + dgst[0] = pack64v (tmps, dgst, gid, i + 0); + dgst[1] = pack64v (tmps, dgst, gid, i + 1); + dgst[2] = pack64v (tmps, dgst, gid, i + 2); + dgst[3] = pack64v (tmps, dgst, gid, i + 3); + dgst[4] = pack64v (tmps, dgst, gid, i + 4); + dgst[5] = pack64v (tmps, dgst, gid, i + 5); + dgst[6] = pack64v (tmps, dgst, gid, i + 6); + dgst[7] = pack64v (tmps, dgst, gid, i + 7); + + out[0] = pack64v (tmps, out, gid, i + 0); + out[1] = pack64v (tmps, out, gid, i + 1); + out[2] = pack64v (tmps, out, gid, i + 2); + out[3] = pack64v (tmps, out, gid, i + 3); + out[4] = pack64v (tmps, out, gid, i + 4); + out[5] = pack64v (tmps, out, gid, i + 5); + out[6] = pack64v (tmps, out, gid, i + 6); + out[7] = pack64v (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = h32_from_64 (dgst[0]); + w0[1] = l32_from_64 (dgst[0]); + w0[2] = h32_from_64 (dgst[1]); + w0[3] = l32_from_64 (dgst[1]); + w1[0] = h32_from_64 (dgst[2]); + w1[1] = l32_from_64 (dgst[2]); + w1[2] = h32_from_64 (dgst[3]); + w1[3] = l32_from_64 (dgst[3]); + w2[0] = h32_from_64 (dgst[4]); + w2[1] = l32_from_64 (dgst[4]); + w2[2] = h32_from_64 (dgst[5]); + w2[3] = l32_from_64 (dgst[5]); + w3[0] = h32_from_64 (dgst[6]); + w3[1] = l32_from_64 (dgst[6]); + w3[2] = h32_from_64 (dgst[7]); + w3[3] = l32_from_64 (dgst[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + hmac_sha512_run_V (w0, w1, w2, w3, w4, w5, w6, w7, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpack64v (tmps, dgst, gid, i + 0, dgst[0]); + unpack64v (tmps, dgst, gid, i + 1, dgst[1]); + unpack64v (tmps, dgst, gid, i + 2, dgst[2]); + unpack64v (tmps, dgst, gid, i + 3, dgst[3]); + unpack64v (tmps, dgst, gid, i + 4, dgst[4]); + unpack64v (tmps, dgst, gid, i + 5, dgst[5]); + unpack64v (tmps, dgst, gid, i + 6, dgst[6]); + unpack64v (tmps, dgst, gid, i + 7, dgst[7]); + + unpack64v (tmps, out, gid, i + 0, out[0]); + unpack64v (tmps, out, gid, i + 1, out[1]); + unpack64v (tmps, out, gid, i + 2, out[2]); + unpack64v (tmps, out, gid, i + 3, out[3]); + unpack64v (tmps, out, gid, i + 4, out[4]); + unpack64v (tmps, out, gid, i + 5, out[5]); + unpack64v (tmps, out, gid, i + 6, out[6]); + unpack64v (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24500_comp (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 data_key[4]; + + data_key[0] = esalt_bufs[DIGESTS_OFFSET].data[0]; + data_key[1] = esalt_bufs[DIGESTS_OFFSET].data[1]; + data_key[2] = esalt_bufs[DIGESTS_OFFSET].data[2]; + data_key[3] = esalt_bufs[DIGESTS_OFFSET].data[3]; + + u32 data_a[12]; + u32 data_b[12]; + u32 data_c[12]; + u32 data_d[12]; + + data_a[ 0] = data_key[0]; + data_a[ 1] = data_key[1]; + data_a[ 2] = data_key[2]; + data_a[ 3] = data_key[3]; + + data_b[ 4] = data_key[0]; + data_b[ 5] = data_key[1]; + data_b[ 6] = data_key[2]; + data_b[ 7] = data_key[3]; + + data_c[ 8] = data_key[0]; + data_c[ 9] = data_key[1]; + data_c[10] = data_key[2]; + data_c[11] = data_key[3]; + + data_d[ 0] = data_key[0]; + data_d[ 1] = data_key[1]; + data_d[ 2] = data_key[2]; + data_d[ 3] = data_key[3]; + + data_a[ 4] = h32_from_64_S (tmps[gid].out[ 1]); // not a bug: out[0] is ignored + data_a[ 5] = l32_from_64_S (tmps[gid].out[ 1]); + data_a[ 6] = h32_from_64_S (tmps[gid].out[ 2]); + data_a[ 7] = l32_from_64_S (tmps[gid].out[ 2]); + data_a[ 8] = h32_from_64_S (tmps[gid].out[ 3]); + data_a[ 9] = l32_from_64_S (tmps[gid].out[ 3]); + data_a[10] = h32_from_64_S (tmps[gid].out[ 4]); + data_a[11] = l32_from_64_S (tmps[gid].out[ 4]); + + data_b[ 0] = h32_from_64_S (tmps[gid].out[ 5]); + data_b[ 1] = l32_from_64_S (tmps[gid].out[ 5]); + data_b[ 2] = h32_from_64_S (tmps[gid].out[ 6]); + data_b[ 3] = l32_from_64_S (tmps[gid].out[ 6]); + + data_b[ 8] = h32_from_64_S (tmps[gid].out[ 7]); + data_b[ 9] = l32_from_64_S (tmps[gid].out[ 7]); + data_b[10] = h32_from_64_S (tmps[gid].out[ 8]); + data_b[11] = l32_from_64_S (tmps[gid].out[ 8]); + + data_c[ 0] = h32_from_64_S (tmps[gid].out[ 9]); + data_c[ 1] = l32_from_64_S (tmps[gid].out[ 9]); + data_c[ 2] = h32_from_64_S (tmps[gid].out[10]); + data_c[ 3] = l32_from_64_S (tmps[gid].out[10]); + data_c[ 4] = h32_from_64_S (tmps[gid].out[11]); + data_c[ 5] = l32_from_64_S (tmps[gid].out[11]); + data_c[ 6] = h32_from_64_S (tmps[gid].out[12]); + data_c[ 7] = l32_from_64_S (tmps[gid].out[12]); + + data_d[ 4] = h32_from_64_S (tmps[gid].out[13]); + data_d[ 5] = l32_from_64_S (tmps[gid].out[13]); + data_d[ 6] = h32_from_64_S (tmps[gid].out[14]); + data_d[ 7] = l32_from_64_S (tmps[gid].out[14]); + data_d[ 8] = h32_from_64_S (tmps[gid].out[15]); + data_d[ 9] = l32_from_64_S (tmps[gid].out[15]); + data_d[10] = h32_from_64_S (tmps[gid].out[16]); + data_d[11] = l32_from_64_S (tmps[gid].out[16]); + + // hash (SHA1 ()) the data_*: + + u32 a[5]; + + sha1_run (data_a, a); + + u32 b[5]; + + sha1_run (data_b, b); + + u32 c[5]; + + sha1_run (data_c, c); + + u32 d[5]; + + sha1_run (data_d, d); + + // set up AES key and AES IV: + + u32 key[8]; + + key[0] = a[0]; + key[1] = a[1]; + key[2] = b[2]; + key[3] = b[3]; + key[4] = b[4]; + key[5] = c[1]; + key[6] = c[2]; + key[7] = c[3]; + + u32 iv[8]; + + iv[0] = a[2]; + iv[1] = a[3]; + iv[2] = a[4]; + iv[3] = b[0]; + iv[4] = b[1]; + iv[5] = c[4]; + iv[6] = d[0]; + iv[7] = d[1]; + + // decrypt with AES-IGE: + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + AES256_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 x_prev[4]; + + x_prev[0] = iv[0]; + x_prev[1] = iv[1]; + x_prev[2] = iv[2]; + x_prev[3] = iv[3]; + + u32 y_prev[4]; + + y_prev[0] = iv[4]; + y_prev[1] = iv[5]; + y_prev[2] = iv[6]; + y_prev[3] = iv[7]; + + u32 out[80] = { 0 }; // 64-byte aligned for SHA1 + + for (int i = 0; i < 68; i += 4) + { + u32 x[4]; + + x[0] = esalt_bufs[DIGESTS_OFFSET].data[4 + i]; + x[1] = esalt_bufs[DIGESTS_OFFSET].data[5 + i]; + x[2] = esalt_bufs[DIGESTS_OFFSET].data[6 + i]; + x[3] = esalt_bufs[DIGESTS_OFFSET].data[7 + i]; + + u32 y[4]; + + y[0] = x[0] ^ y_prev[0]; + y[1] = x[1] ^ y_prev[1]; + y[2] = x[2] ^ y_prev[2]; + y[3] = x[3] ^ y_prev[3]; + + u32 dec[4]; + + AES256_decrypt (ks, y, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + y_prev[0] = dec[0] ^ x_prev[0]; + y_prev[1] = dec[1] ^ x_prev[1]; + y_prev[2] = dec[2] ^ x_prev[2]; + y_prev[3] = dec[3] ^ x_prev[3]; + + out[i + 0] = y_prev[0]; + out[i + 1] = y_prev[1]; + out[i + 2] = y_prev[2]; + out[i + 3] = y_prev[3]; + + x_prev[0] = x[0]; + x_prev[1] = x[1]; + x_prev[2] = x[2]; + x_prev[3] = x[3]; + } + + // final SHA1 checksum of the decrypted data (out): + + sha1_ctx_t ctx; + + sha1_init (&ctx); + sha1_update (&ctx, out, 272); + sha1_final (&ctx); + + const u32 r0 = ctx.h[0]; + const u32 r1 = ctx.h[1]; + const u32 r2 = ctx.h[2]; + const u32 r3 = ctx.h[3]; + + // verify: + + if (r0 == data_key[0] && + r1 == data_key[1] && + r2 == data_key[2] && + r3 == data_key[3]) + { + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + { + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); + } + } +} diff --git a/OpenCL/m24610-pure.cl b/OpenCL/m24610-pure.cl new file mode 100644 index 000000000..aad6b6729 --- /dev/null +++ b/OpenCL/m24610-pure.cl @@ -0,0 +1,346 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct sqlcipher_sha1_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[10]; + u32 out[10]; + +} sqlcipher_sha1_tmp_t; + +typedef struct sqlcipher +{ + u32 iv_buf[4]; + u32 data_buf[4]; + + u32 type; + +} sqlcipher_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24610_init (KERN_ATTR_TMPS_ESALT (sqlcipher_sha1_tmp_t, sqlcipher_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + + tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) + { + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + } +} + +KERNEL_FQ void m24610_loop (KERN_ATTR_TMPS_ESALT (sqlcipher_sha1_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[5]; + u32x opad[5]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + + for (u32 i = 0; i < 8; i += 5) + { + u32x dgst[5]; + u32x out[5]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + } +} + +KERNEL_FQ void m24610_comp (KERN_ATTR_TMPS_ESALT (sqlcipher_sha1_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + u32 dec[4]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv_buf[0]; + dec[1] ^= iv_buf[1]; + dec[2] ^= iv_buf[2]; + dec[3] ^= iv_buf[3]; + + if (dec[0] != 0) return; + if (dec[1] != 0) return; + if (dec[2] != 0) return; + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24620-pure.cl b/OpenCL/m24620-pure.cl new file mode 100644 index 000000000..da67ba36d --- /dev/null +++ b/OpenCL/m24620-pure.cl @@ -0,0 +1,385 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct sqlcipher_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[8]; + u32 out[8]; + +} sqlcipher_sha256_tmp_t; + +typedef struct sqlcipher +{ + u32 iv_buf[4]; + u32 data_buf[4]; + + u32 type; + +} sqlcipher_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24620_init (KERN_ATTR_TMPS_ESALT (sqlcipher_sha256_tmp_t, sqlcipher_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24620_loop (KERN_ATTR_TMPS_ESALT (sqlcipher_sha256_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24620_comp (KERN_ATTR_TMPS_ESALT (sqlcipher_sha256_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + u32 dec[4]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv_buf[0]; + dec[1] ^= iv_buf[1]; + dec[2] ^= iv_buf[2]; + dec[3] ^= iv_buf[3]; + + if (dec[0] != 0) return; + if (dec[1] != 0) return; + if (dec[2] != 0) return; + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24630-pure.cl b/OpenCL/m24630-pure.cl new file mode 100644 index 000000000..c54ca9a0b --- /dev/null +++ b/OpenCL/m24630-pure.cl @@ -0,0 +1,441 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct sqlcipher_sha512_tmp +{ + u64 ipad[8]; + u64 opad[8]; + + u64 dgst[8]; + u64 out[8]; + +} sqlcipher_sha512_tmp_t; + +typedef struct sqlcipher +{ + u32 iv_buf[4]; + u32 data_buf[4]; + + u32 type; + +} sqlcipher_t; + +DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + w0[0] = h32_from_64 (digest[0]); + w0[1] = l32_from_64 (digest[0]); + w0[2] = h32_from_64 (digest[1]); + w0[3] = l32_from_64 (digest[1]); + w1[0] = h32_from_64 (digest[2]); + w1[1] = l32_from_64 (digest[2]); + w1[2] = h32_from_64 (digest[3]); + w1[3] = l32_from_64 (digest[3]); + w2[0] = h32_from_64 (digest[4]); + w2[1] = l32_from_64 (digest[4]); + w2[2] = h32_from_64 (digest[5]); + w2[3] = l32_from_64 (digest[5]); + w3[0] = h32_from_64 (digest[6]); + w3[1] = l32_from_64 (digest[6]); + w3[2] = h32_from_64 (digest[7]); + w3[3] = l32_from_64 (digest[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); +} + +KERNEL_FQ void m24630_init (KERN_ATTR_TMPS_ESALT (sqlcipher_sha512_tmp_t, sqlcipher_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha512_hmac_ctx_t sha512_hmac_ctx; + + sha512_hmac_init_global_swap (&sha512_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha512_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha512_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha512_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha512_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha512_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha512_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha512_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha512_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha512_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha512_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha512_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha512_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; + + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + + sha512_hmac_update_128 (&sha512_hmac_ctx2, w0, w1, w2, w3, w4, w5, w6, w7, 4); + + sha512_hmac_final (&sha512_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha512_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha512_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha512_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha512_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha512_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha512_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha512_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha512_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24630_loop (KERN_ATTR_TMPS_ESALT (sqlcipher_sha512_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x ipad[8]; + u64x opad[8]; + + ipad[0] = pack64v (tmps, ipad, gid, 0); + ipad[1] = pack64v (tmps, ipad, gid, 1); + ipad[2] = pack64v (tmps, ipad, gid, 2); + ipad[3] = pack64v (tmps, ipad, gid, 3); + ipad[4] = pack64v (tmps, ipad, gid, 4); + ipad[5] = pack64v (tmps, ipad, gid, 5); + ipad[6] = pack64v (tmps, ipad, gid, 6); + ipad[7] = pack64v (tmps, ipad, gid, 7); + + opad[0] = pack64v (tmps, opad, gid, 0); + opad[1] = pack64v (tmps, opad, gid, 1); + opad[2] = pack64v (tmps, opad, gid, 2); + opad[3] = pack64v (tmps, opad, gid, 3); + opad[4] = pack64v (tmps, opad, gid, 4); + opad[5] = pack64v (tmps, opad, gid, 5); + opad[6] = pack64v (tmps, opad, gid, 6); + opad[7] = pack64v (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u64x dgst[8]; + u64x out[8]; + + dgst[0] = pack64v (tmps, dgst, gid, i + 0); + dgst[1] = pack64v (tmps, dgst, gid, i + 1); + dgst[2] = pack64v (tmps, dgst, gid, i + 2); + dgst[3] = pack64v (tmps, dgst, gid, i + 3); + dgst[4] = pack64v (tmps, dgst, gid, i + 4); + dgst[5] = pack64v (tmps, dgst, gid, i + 5); + dgst[6] = pack64v (tmps, dgst, gid, i + 6); + dgst[7] = pack64v (tmps, dgst, gid, i + 7); + + out[0] = pack64v (tmps, out, gid, i + 0); + out[1] = pack64v (tmps, out, gid, i + 1); + out[2] = pack64v (tmps, out, gid, i + 2); + out[3] = pack64v (tmps, out, gid, i + 3); + out[4] = pack64v (tmps, out, gid, i + 4); + out[5] = pack64v (tmps, out, gid, i + 5); + out[6] = pack64v (tmps, out, gid, i + 6); + out[7] = pack64v (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = h32_from_64 (dgst[0]); + w0[1] = l32_from_64 (dgst[0]); + w0[2] = h32_from_64 (dgst[1]); + w0[3] = l32_from_64 (dgst[1]); + w1[0] = h32_from_64 (dgst[2]); + w1[1] = l32_from_64 (dgst[2]); + w1[2] = h32_from_64 (dgst[3]); + w1[3] = l32_from_64 (dgst[3]); + w2[0] = h32_from_64 (dgst[4]); + w2[1] = l32_from_64 (dgst[4]); + w2[2] = h32_from_64 (dgst[5]); + w2[3] = l32_from_64 (dgst[5]); + w3[0] = h32_from_64 (dgst[6]); + w3[1] = l32_from_64 (dgst[6]); + w3[2] = h32_from_64 (dgst[7]); + w3[3] = l32_from_64 (dgst[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + hmac_sha512_run_V (w0, w1, w2, w3, w4, w5, w6, w7, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpack64v (tmps, dgst, gid, i + 0, dgst[0]); + unpack64v (tmps, dgst, gid, i + 1, dgst[1]); + unpack64v (tmps, dgst, gid, i + 2, dgst[2]); + unpack64v (tmps, dgst, gid, i + 3, dgst[3]); + unpack64v (tmps, dgst, gid, i + 4, dgst[4]); + unpack64v (tmps, dgst, gid, i + 5, dgst[5]); + unpack64v (tmps, dgst, gid, i + 6, dgst[6]); + unpack64v (tmps, dgst, gid, i + 7, dgst[7]); + + unpack64v (tmps, out, gid, i + 0, out[0]); + unpack64v (tmps, out, gid, i + 1, out[1]); + unpack64v (tmps, out, gid, i + 2, out[2]); + unpack64v (tmps, out, gid, i + 3, out[3]); + unpack64v (tmps, out, gid, i + 4, out[4]); + unpack64v (tmps, out, gid, i + 5, out[5]); + unpack64v (tmps, out, gid, i + 6, out[6]); + unpack64v (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24630_comp (KERN_ATTR_TMPS_ESALT (sqlcipher_sha512_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = h32_from_64_S (tmps[gid].out[0]); + ukey[1] = l32_from_64_S (tmps[gid].out[0]); + ukey[2] = h32_from_64_S (tmps[gid].out[1]); + ukey[3] = l32_from_64_S (tmps[gid].out[1]); + ukey[4] = h32_from_64_S (tmps[gid].out[2]); + ukey[5] = l32_from_64_S (tmps[gid].out[2]); + ukey[6] = h32_from_64_S (tmps[gid].out[3]); + ukey[7] = l32_from_64_S (tmps[gid].out[3]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + u32 dec[4]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv_buf[0]; + dec[1] ^= iv_buf[1]; + dec[2] ^= iv_buf[2]; + dec[3] ^= iv_buf[3]; + + if (dec[0] != 0) return; + if (dec[1] != 0) return; + if (dec[2] != 0) return; + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24700_a0-optimized.cl b/OpenCL/m24700_a0-optimized.cl new file mode 100644 index 000000000..50bc227b3 --- /dev/null +++ b/OpenCL/m24700_a0-optimized.cl @@ -0,0 +1,496 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_M_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24700_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24700_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_S_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24700_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m24700_a0-pure.cl b/OpenCL/m24700_a0-pure.cl new file mode 100644 index 000000000..796eca01d --- /dev/null +++ b/OpenCL/m24700_a0-pure.cl @@ -0,0 +1,143 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24700_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24700_a1-optimized.cl b/OpenCL/m24700_a1-optimized.cl new file mode 100644 index 000000000..c9570e955 --- /dev/null +++ b/OpenCL/m24700_a1-optimized.cl @@ -0,0 +1,612 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_M_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24700_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24700_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_S_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24700_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24700_a1-pure.cl b/OpenCL/m24700_a1-pure.cl new file mode 100644 index 000000000..3e2fbf662 --- /dev/null +++ b/OpenCL/m24700_a1-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24700_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24700_a3-optimized.cl b/OpenCL/m24700_a3-optimized.cl new file mode 100644 index 000000000..aaaddf8fb --- /dev/null +++ b/OpenCL/m24700_a3-optimized.cl @@ -0,0 +1,784 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +DECLSPEC void m24700m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = a; + w0_t[1] = b & 0xff; w0_t[1] |= 0x8000; + w0_t[2] = 0; + w0_t[3] = 0; + w1_t[0] = 0; + w1_t[1] = 0; + w1_t[2] = 0; + w1_t[3] = 0; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 5 * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_M_SIMD (a, b, c, d); + } +} + +DECLSPEC void m24700s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = a; + w0_t[1] = b & 0xff; w0_t[1] |= 0x8000; + w0_t[2] = 0; + w0_t[3] = 0; + w1_t[0] = 0; + w1_t[1] = 0; + w1_t[2] = 0; + w1_t[3] = 0; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 5 * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_S_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m24700_a3-pure.cl b/OpenCL/m24700_a3-pure.cl new file mode 100644 index 000000000..9942c4415 --- /dev/null +++ b/OpenCL/m24700_a3-pure.cl @@ -0,0 +1,163 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1] & 0xff; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1] & 0xff; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24700_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1] & 0xff; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1] & 0xff; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24800_a0-optimized.cl b/OpenCL/m24800_a0-optimized.cl new file mode 100644 index 000000000..9ef086f7c --- /dev/null +++ b/OpenCL/m24800_a0-optimized.cl @@ -0,0 +1,362 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x out_len2 = out_len * 2; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, out_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + out_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x out_len2 = out_len * 2; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, out_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + out_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m24800_a0-pure.cl b/OpenCL/m24800_a0-pure.cl new file mode 100644 index 000000000..da2cbc112 --- /dev/null +++ b/OpenCL/m24800_a0-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m24800_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, tmp.i, tmp.pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24800_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, tmp.i, tmp.pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24800_a1-optimized.cl b/OpenCL/m24800_a1-optimized.cl new file mode 100644 index 000000000..02451900c --- /dev/null +++ b/OpenCL/m24800_a1-optimized.cl @@ -0,0 +1,464 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + const u32x pw_len2 = pw_len * 2; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, pw_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + const u32x pw_len2 = pw_len * 2; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, pw_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24800_a1-pure.cl b/OpenCL/m24800_a1-pure.cl new file mode 100644 index 000000000..da321e287 --- /dev/null +++ b/OpenCL/m24800_a1-pure.cl @@ -0,0 +1,185 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m24800_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, c, pw_len + comb_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24800_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, c, pw_len + comb_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24800_a3-optimized.cl b/OpenCL/m24800_a3-optimized.cl new file mode 100644 index 000000000..de5ad3649 --- /dev/null +++ b/OpenCL/m24800_a3-optimized.cl @@ -0,0 +1,612 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +DECLSPEC void m24800m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * pads + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + //make_utf16beN (w1_t, w2_t, w3_t); + //make_utf16beN (w0_t, w0_t, w1_t); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + append_0x80_4x4 (x0_t, x1_t, x2_t, x3_t, pw_len ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +DECLSPEC void m24800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * pads + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + //make_utf16beN (w1_t, w2_t, w3_t); + //make_utf16beN (w0_t, w0_t, w1_t); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + append_0x80_4x4 (x0_t, x1_t, x2_t, x3_t, pw_len ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m24800_a3-pure.cl b/OpenCL/m24800_a3-pure.cl new file mode 100644 index 000000000..9d2bbc0ad --- /dev/null +++ b/OpenCL/m24800_a3-pure.cl @@ -0,0 +1,157 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m24800_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = hc_swap32_S (pws[gid].i[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | hc_swap32_S (w0r); + + w[0] = w0; + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, w, pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24800_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = hc_swap32_S (pws[gid].i[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | hc_swap32_S (w0r); + + w[0] = w0; + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, w, pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24900_a0-optimized.cl b/OpenCL/m24900_a0-optimized.cl new file mode 100644 index 000000000..312691021 --- /dev/null +++ b/OpenCL/m24900_a0-optimized.cl @@ -0,0 +1,337 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24900_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_M_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24900_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24900_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_S_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24900_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m24900_a1-optimized.cl b/OpenCL/m24900_a1-optimized.cl new file mode 100644 index 000000000..a25797ee2 --- /dev/null +++ b/OpenCL/m24900_a1-optimized.cl @@ -0,0 +1,454 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24900_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_M_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24900_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24900_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_S_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24900_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24900_a3-optimized.cl b/OpenCL/m24900_a3-optimized.cl new file mode 100644 index 000000000..dd2d17f69 --- /dev/null +++ b/OpenCL/m24900_a3-optimized.cl @@ -0,0 +1,599 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +DECLSPEC void m24900m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = pw_len * 8; + t3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, t1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_M_SIMD (ax, bx, cx, dx); + } +} + +DECLSPEC void m24900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3], + }; + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = pw_len * 8; + t3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, t1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_S_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m25300-pure.cl b/OpenCL/m25300-pure.cl new file mode 100644 index 000000000..ef23a0446 --- /dev/null +++ b/OpenCL/m25300-pure.cl @@ -0,0 +1,182 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct office2016_tmp +{ + u64 out[8]; + +} office2016_tmp_t; + +KERNEL_FQ void m25300_init (KERN_ATTR_TMPS (office2016_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + sha512_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha512_update_global_utf16le_swap (&ctx, pws[gid].i, pws[gid].pw_len); + + sha512_final (&ctx); + + tmps[gid].out[0] = ctx.h[0]; + tmps[gid].out[1] = ctx.h[1]; + tmps[gid].out[2] = ctx.h[2]; + tmps[gid].out[3] = ctx.h[3]; + tmps[gid].out[4] = ctx.h[4]; + tmps[gid].out[5] = ctx.h[5]; + tmps[gid].out[6] = ctx.h[6]; + tmps[gid].out[7] = ctx.h[7]; +} + +KERNEL_FQ void m25300_loop (KERN_ATTR_TMPS (office2016_tmp_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x t0 = pack64v (tmps, out, gid, 0); + u64x t1 = pack64v (tmps, out, gid, 1); + u64x t2 = pack64v (tmps, out, gid, 2); + u64x t3 = pack64v (tmps, out, gid, 3); + u64x t4 = pack64v (tmps, out, gid, 4); + u64x t5 = pack64v (tmps, out, gid, 5); + u64x t6 = pack64v (tmps, out, gid, 6); + u64x t7 = pack64v (tmps, out, gid, 7); + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = 0; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0x80000000; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (64 + 4) * 8; + + for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) + { + w0[0] = h32_from_64 (t0); + w0[1] = l32_from_64 (t0); + w0[2] = h32_from_64 (t1); + w0[3] = l32_from_64 (t1); + w1[0] = h32_from_64 (t2); + w1[1] = l32_from_64 (t2); + w1[2] = h32_from_64 (t3); + w1[3] = l32_from_64 (t3); + w2[0] = h32_from_64 (t4); + w2[1] = l32_from_64 (t4); + w2[2] = h32_from_64 (t5); + w2[3] = l32_from_64 (t5); + w3[0] = h32_from_64 (t6); + w3[1] = l32_from_64 (t6); + w3[2] = h32_from_64 (t7); + w3[3] = l32_from_64 (t7); + w4[0] = hc_swap32 (j); + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + t0 = digest[0]; + t1 = digest[1]; + t2 = digest[2]; + t3 = digest[3]; + t4 = digest[4]; + t5 = digest[5]; + t6 = digest[6]; + t7 = digest[7]; + } + + unpack64v (tmps, out, gid, 0, t0); + unpack64v (tmps, out, gid, 1, t1); + unpack64v (tmps, out, gid, 2, t2); + unpack64v (tmps, out, gid, 3, t3); + unpack64v (tmps, out, gid, 4, t4); + unpack64v (tmps, out, gid, 5, t5); + unpack64v (tmps, out, gid, 6, t6); + unpack64v (tmps, out, gid, 7, t7); +} + +KERNEL_FQ void m25300_comp (KERN_ATTR_TMPS (office2016_tmp_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u32 r0 = l32_from_64_S (tmps[gid].out[7]); + const u32 r1 = h32_from_64_S (tmps[gid].out[7]); + const u32 r2 = l32_from_64_S (tmps[gid].out[3]); + const u32 r3 = h32_from_64_S (tmps[gid].out[3]); + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25400-pure.cl b/OpenCL/m25400-pure.cl new file mode 100644 index 000000000..0a0aba524 --- /dev/null +++ b/OpenCL/m25400-pure.cl @@ -0,0 +1,444 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +// TODO use user password as input for md5 of o_digest if no owner password is set +// TODO dynamically add user password including padding to the RC4 input for the computation of the pdf o-value + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_md5.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +CONSTANT_VK u32a padding[8] = +{ + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 +}; + +typedef struct pdf +{ + int V; + int R; + int P; + + int enc_md; + + u32 id_buf[8]; + u32 u_buf[32]; + u32 o_buf[32]; + + int id_len; + int o_len; + int u_len; + + u32 rc4key[2]; + u32 rc4data[2]; + +} pdf_t; + +typedef struct pdf14_tmp +{ + u32 digest[4]; + u32 out[4]; + +} pdf14_tmp_t; + +typedef struct +{ + u8 S[256]; + + u32 wtf_its_faster; + +} RC4_KEY; + +DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) +{ + u8 tmp; + + tmp = rc4_key->S[i]; + rc4_key->S[i] = rc4_key->S[j]; + rc4_key->S[j] = tmp; +} + +DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) +{ + u32 v = 0x03020100; + u32 a = 0x04040404; + + LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 64; i++) + { + *ptr++ = v; v += a; + } + + u32 j = 0; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 16; i++) + { + u32 idx = i * 16; + + u32 v; + + v = data[0]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[1]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[2]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[3]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + } +} + +DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +{ + #ifdef _unroll + #pragma unroll + #endif + for (u32 k = 0; k < 4; k++) + { + u32 xor4 = 0; + + u8 idx; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 0; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 8; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 16; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 24; + + out[k] = in[k] ^ xor4; + } + + return j; +} + +KERNEL_FQ void m25400_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + //const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + const u32 pw_len = pws[gid].pw_len; + + /** + * shared + */ + + //LOCAL_AS RC4_KEY rc4_keys[64]; + //LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + + u32 P = esalt_bufs[DIGESTS_OFFSET].P; + + u32 id_buf[12]; + + id_buf[ 0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[ 1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[ 2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[ 3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; + + id_buf[ 4] = esalt_bufs[DIGESTS_OFFSET].id_buf[4]; + id_buf[ 5] = esalt_bufs[DIGESTS_OFFSET].id_buf[5]; + id_buf[ 6] = esalt_bufs[DIGESTS_OFFSET].id_buf[6]; + id_buf[ 7] = esalt_bufs[DIGESTS_OFFSET].id_buf[7]; + + id_buf[ 8] = 0; + id_buf[ 9] = 0; + id_buf[10] = 0; + id_buf[11] = 0; + + u32 rc4data[2]; + + rc4data[0] = padding[0]; + rc4data[1] = padding[1]; + + /** + * main init + */ + + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; + + // max length supported by pdf11 is 32 + + w0_t[0] = padding[0]; + w0_t[1] = padding[1]; + w0_t[2] = padding[2]; + w0_t[3] = padding[3]; + w1_t[0] = padding[4]; + w1_t[1] = padding[5]; + w1_t[2] = padding[6]; + w1_t[3] = padding[7]; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 0; + w3_t[3] = 0; + + switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, pw_len); + + // add password + // truncate at 32 is wanted, not a bug! + // add padding + + w0_t[0] |= w0[0]; + w0_t[1] |= w0[1]; + w0_t[2] |= w0[2]; + w0_t[3] |= w0[3]; + w1_t[0] |= w1[0]; + w1_t[1] |= w1[1]; + w1_t[2] |= w1[2]; + w1_t[3] |= w1[3]; + w2_t[0] = 0x80; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 32 * 8; + w3_t[3] = 0; + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (w0_t, w1_t, w2_t, w3_t, digest); + + tmps[gid].digest[0] = digest[0]; + tmps[gid].digest[1] = digest[1]; + tmps[gid].digest[2] = digest[2]; + tmps[gid].digest[3] = digest[3]; + + tmps[gid].out[0] = rc4data[0]; + tmps[gid].out[1] = rc4data[1]; + tmps[gid].out[2] = 0; + tmps[gid].out[3] = 0; +} + +KERNEL_FQ void m25400_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * shared + */ + + LOCAL_VK RC4_KEY rc4_keys[64]; + + LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + + /** + * loop + */ + + u32 digest[4]; + + digest[0] = tmps[gid].digest[0]; + digest[1] = tmps[gid].digest[1]; + digest[2] = tmps[gid].digest[2]; + digest[3] = tmps[gid].digest[3]; + + u32 out[4]; + + out[0] = tmps[gid].out[0]; + out[1] = tmps[gid].out[1]; + out[2] = tmps[gid].out[2]; + out[3] = tmps[gid].out[3]; + + for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) + { + if (j < 50) + { + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; + + w0_t[0] = digest[0]; + w0_t[1] = digest[1]; + w0_t[2] = digest[2]; + w0_t[3] = digest[3]; + w1_t[0] = 0x80; + w1_t[1] = 0; + w1_t[2] = 0; + w1_t[3] = 0; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 16 * 8; + w3_t[3] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (w0_t, w1_t, w2_t, w3_t, digest); + } + else + { + const u32 x = j - 50; + + const u32 xv = x << 0 + | x << 8 + | x << 16 + | x << 24; + + u32 tmp[4]; + + tmp[0] = digest[0] ^ xv; + tmp[1] = digest[1] ^ xv; + tmp[2] = digest[2] ^ xv; + tmp[3] = digest[3] ^ xv; + + rc4_init_16 (rc4_key, tmp); + + rc4_next_16 (rc4_key, 0, 0, out, out); + } + } + + tmps[gid].digest[0] = digest[0]; + tmps[gid].digest[1] = digest[1]; + tmps[gid].digest[2] = digest[2]; + tmps[gid].digest[3] = digest[3]; + + tmps[gid].out[0] = out[0]; + tmps[gid].out[1] = out[1]; + tmps[gid].out[2] = out[2]; + tmps[gid].out[3] = out[3]; +} + +KERNEL_FQ void m25400_comp (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 r0 = tmps[gid].out[0]; + const u32 r1 = tmps[gid].out[1]; + const u32 r2 = 0; + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25500-optimized.cl b/OpenCL/m25500-optimized.cl new file mode 100644 index 000000000..dfc292a26 --- /dev/null +++ b/OpenCL/m25500-optimized.cl @@ -0,0 +1,455 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#include "inc_cipher_aes-gcm.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[16]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m25500_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m25500_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + // keys + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 key[60] = { 0 }; + + u32 subKey[4] = { 0 }; + + AES256_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (key, subKey, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); + + // iv + + const u32 iv[4] = { + esalt_bufs[DIGESTS_OFFSET].iv_buf[0], + esalt_bufs[DIGESTS_OFFSET].iv_buf[1], + esalt_bufs[DIGESTS_OFFSET].iv_buf[2], + esalt_bufs[DIGESTS_OFFSET].iv_buf[3] + }; + + u32 J0[4] = { + iv[0], + iv[1], + iv[2], + 0x00000001 + }; + + // ct + + u32 enc[14] = { 0 }; + + enc[ 0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 0]; + enc[ 1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 1]; + enc[ 2] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 2]; + enc[ 3] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 3]; + enc[ 4] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 4]; + enc[ 5] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 5]; + enc[ 6] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 6]; + enc[ 7] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 7]; + enc[ 8] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 8]; + enc[ 9] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 9]; + enc[10] = esalt_bufs[DIGESTS_OFFSET].ct_buf[10]; + enc[11] = esalt_bufs[DIGESTS_OFFSET].ct_buf[11]; + enc[12] = esalt_bufs[DIGESTS_OFFSET].ct_buf[12]; + enc[13] = esalt_bufs[DIGESTS_OFFSET].ct_buf[13]; + + u32 enc_len = esalt_bufs[DIGESTS_OFFSET].ct_len; + + u32 S[4] = { 0 }; + + u32 t[4] = { 0 }; + + S[0] ^= enc[0]; + S[1] ^= enc[1]; + S[2] ^= enc[2]; + S[3] ^= enc[3]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0] ^ enc[4]; + S[1] = t[1] ^ enc[5]; + S[2] = t[2] ^ enc[6]; + S[3] = t[3] ^ enc[7]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0] ^ enc[8]; + S[1] = t[1] ^ enc[9]; + S[2] = t[2] ^ enc[10]; + S[3] = t[3] ^ enc[11]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0]; + S[1] = t[1]; + S[2] = t[2]; + S[3] = t[3]; + + t[0] = enc[12]; + t[1] = enc[13]; + t[2] = 0; + t[3] = 0; + + S[0] ^= t[0]; + S[1] ^= t[1]; + S[2] ^= t[2]; + S[3] ^= t[3]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0]; + S[1] = t[1]; + S[2] = t[2]; + S[3] = t[3]; + + u32 len_buf[4] = { 0 }; + + len_buf[0] = 0; + len_buf[3] = enc_len * 8; + + S[0] ^= len_buf[0]; + S[1] ^= len_buf[1]; + S[2] ^= len_buf[2]; + S[3] ^= len_buf[3]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0]; + S[1] = t[1]; + S[2] = t[2]; + S[3] = t[3]; + + J0[3] = 0x00000001; + + u32 T[4] = { 0 }; + + AES256_encrypt (key, J0, T, s_te0, s_te1, s_te2, s_te3, s_te4); + + /* compare tag */ + + const u32 r0 = T[0] ^ S[0]; + const u32 r1 = T[1] ^ S[1]; + const u32 r2 = T[2] ^ S[2]; + const u32 r3 = T[3] ^ S[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25500-pure.cl b/OpenCL/m25500-pure.cl new file mode 100644 index 000000000..56dddb096 --- /dev/null +++ b/OpenCL/m25500-pure.cl @@ -0,0 +1,405 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#include "inc_cipher_aes-gcm.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[16]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m25500_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m25500_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + // keys + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 key_len = 32 * 8; + + u32 key[60] = { 0 }; + u32 subKey[4] = { 0 }; + + AES_GCM_Init (ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); + + // iv + + const u32 iv[4] = { + esalt_bufs[DIGESTS_OFFSET].iv_buf[0], + esalt_bufs[DIGESTS_OFFSET].iv_buf[1], + esalt_bufs[DIGESTS_OFFSET].iv_buf[2], + esalt_bufs[DIGESTS_OFFSET].iv_buf[3] + }; + + const u32 iv_len = esalt_bufs[DIGESTS_OFFSET].iv_len; + + u32 J0[4] = { 0 }; + + AES_GCM_Prepare_J0 (iv, iv_len, subKey, J0); + + // ct + + /* + u32 enc[14] = { 0 }; + + enc[ 0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 0]; + enc[ 1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 1]; + enc[ 2] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 2]; + enc[ 3] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 3]; + enc[ 4] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 4]; + enc[ 5] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 5]; + enc[ 6] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 6]; + enc[ 7] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 7]; + enc[ 8] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 8]; + enc[ 9] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 9]; + enc[10] = esalt_bufs[DIGESTS_OFFSET].ct_buf[10]; + enc[11] = esalt_bufs[DIGESTS_OFFSET].ct_buf[11]; + enc[12] = esalt_bufs[DIGESTS_OFFSET].ct_buf[12]; + enc[13] = esalt_bufs[DIGESTS_OFFSET].ct_buf[13]; + + u32 enc_len = esalt_bufs[DIGESTS_OFFSET].ct_len; + */ + + /* + // decrypt buffer is not usefull here, skip + u32 dec[14] = { 0 }; + + AES_GCM_GCTR (key, J0, enc, enc_len, dec, s_te0, s_te1, s_te2, s_te3, s_te4); + */ + + u32 T[4] = { 0 }; + u32 S[4] = { 0 }; + + u32 S_len = 16; + u32 aad_buf[4] = { 0 }; + u32 aad_len = 0; + + //AES_GCM_GHASH (subKey, aad_buf, aad_len, enc, enc_len, S); + + AES_GCM_GHASH_GLOBAL (subKey, aad_buf, aad_len, esalt_bufs[DIGESTS_OFFSET].ct_buf, esalt_bufs[DIGESTS_OFFSET].ct_len, S); + + AES_GCM_GCTR (key, J0, S, S_len, T, s_te0, s_te1, s_te2, s_te3, s_te4); + + /* compare tag */ + + const u32 r0 = T[0]; + const u32 r1 = T[1]; + const u32 r2 = T[2]; + const u32 r3 = T[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25600-pure.cl b/OpenCL/m25600-pure.cl new file mode 100644 index 000000000..e490679a7 --- /dev/null +++ b/OpenCL/m25600-pure.cl @@ -0,0 +1,962 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct bcrypt_tmp +{ + u32 E[18]; + + u32 P[18]; + + u32 S0[256]; + u32 S1[256]; + u32 S2[256]; + u32 S3[256]; + +} bcrypt_tmp_t; + +// http://www.schneier.com/code/constants.txt + +CONSTANT_VK u32a c_sbox0[256] = +{ + 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, + 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, + 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, + 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, + 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, + 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, + 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, + 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, + 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, + 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, + 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, + 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, + 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, + 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, + 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, + 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, + 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, + 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, + 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, + 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, + 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, + 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, + 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, + 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, + 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, + 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, + 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, + 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, + 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, + 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, + 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, + 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, + 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, + 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, + 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, + 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, + 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, + 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, + 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, + 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, + 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, + 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, + 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, + 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, + 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, + 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, + 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, + 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, + 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, + 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, + 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, + 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, + 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, + 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, + 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, + 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, + 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, + 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, + 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, + 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, + 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, + 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, + 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, + 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a +}; + +CONSTANT_VK u32a c_sbox1[256] = +{ + 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, + 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, + 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, + 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, + 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, + 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, + 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, + 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, + 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, + 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, + 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, + 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, + 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, + 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, + 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, + 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, + 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, + 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, + 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, + 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, + 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, + 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, + 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, + 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, + 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, + 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, + 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, + 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, + 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, + 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, + 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, + 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, + 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, + 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, + 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, + 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, + 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, + 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, + 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, + 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, + 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, + 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, + 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, + 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, + 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, + 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, + 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, + 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, + 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, + 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, + 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, + 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, + 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, + 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, + 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, + 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, + 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, + 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, + 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, + 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, + 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, + 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, + 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, + 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7 +}; + +CONSTANT_VK u32a c_sbox2[256] = +{ + 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, + 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, + 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, + 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, + 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, + 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, + 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, + 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, + 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, + 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, + 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, + 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, + 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, + 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, + 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, + 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, + 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, + 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, + 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, + 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, + 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, + 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, + 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, + 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, + 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, + 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, + 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, + 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, + 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, + 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, + 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, + 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, + 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, + 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, + 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, + 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, + 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, + 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, + 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, + 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, + 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, + 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, + 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, + 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, + 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, + 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, + 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, + 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, + 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, + 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, + 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, + 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, + 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, + 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, + 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, + 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, + 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, + 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, + 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, + 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, + 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, + 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, + 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, + 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0 +}; + +CONSTANT_VK u32a c_sbox3[256] = +{ + 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, + 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, + 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, + 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, + 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, + 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, + 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, + 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, + 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, + 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, + 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, + 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, + 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, + 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, + 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, + 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, + 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, + 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, + 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, + 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, + 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, + 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, + 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, + 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, + 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, + 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, + 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, + 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, + 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, + 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, + 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, + 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, + 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, + 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, + 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, + 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, + 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, + 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, + 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, + 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, + 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, + 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, + 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, + 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, + 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, + 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, + 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, + 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, + 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, + 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, + 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, + 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, + 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, + 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, + 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, + 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, + 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, + 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, + 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, + 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, + 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, + 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, + 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, + 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 +}; + +CONSTANT_VK u32a c_pbox[18] = +{ + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, + 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, + 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, + 0x9216d5d9, 0x8979fb1b +}; + +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + const u32 r0 = unpack_v8d_from_v32_S ((L)); \ + const u32 r1 = unpack_v8c_from_v32_S ((L)); \ + const u32 r2 = unpack_v8b_from_v32_S ((L)); \ + const u32 r3 = unpack_v8a_from_v32_S ((L)); \ + \ + tmp = S0[r0]; \ + tmp += S1[r1]; \ + tmp ^= S2[r2]; \ + tmp += S3[r3]; \ + \ + (R) ^= tmp ^ P[(N)]; \ +} + +#define BF_ENCRYPT(L,R) \ +{ \ + L ^= P[0]; \ + \ + BF_ROUND (L, R, 1); \ + BF_ROUND (R, L, 2); \ + BF_ROUND (L, R, 3); \ + BF_ROUND (R, L, 4); \ + BF_ROUND (L, R, 5); \ + BF_ROUND (R, L, 6); \ + BF_ROUND (L, R, 7); \ + BF_ROUND (R, L, 8); \ + BF_ROUND (L, R, 9); \ + BF_ROUND (R, L, 10); \ + BF_ROUND (L, R, 11); \ + BF_ROUND (R, L, 12); \ + BF_ROUND (L, R, 13); \ + BF_ROUND (R, L, 14); \ + BF_ROUND (L, R, 15); \ + BF_ROUND (R, L, 16); \ + \ + u32 tmp; \ + \ + tmp = R; \ + R = L; \ + L = tmp; \ + \ + L ^= P[17]; \ +} + +#ifdef DYNAMIC_LOCAL +extern __shared__ u32 lm[]; +#endif + +DECLSPEC void expand_key (u32 *E, u32 *W, const int len) +{ + u8 *E_ptr = (u8 *) E; + u8 *W_ptr = (u8 *) W; + + for (int pos = 0; pos < 72; pos++) // pos++ is not a bug, we actually want that zero byte here + { + const int left = 72 - pos; + + const int sz = (len < left) ? len : left; // should be MIN() + + for (int i = 0; i < sz; i++) + { + E_ptr[pos + i] = W_ptr[i]; + } + + pos += sz; + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25600_init (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + #ifdef IS_CUDA + + if (gid >= gid_max) return; + + u32 l_bin2asc[256]; + + for (u32 i = 0; i < 256; i++) + + #else + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + + #endif + + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + #ifndef IS_CUDA + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + #endif + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, w, pw_len); + + md5_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + + const u32 pw_len_md5 = 32; + + w[ 0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w[ 1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w[ 2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w[ 3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w[ 4] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w[ 5] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w[ 6] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w[ 7] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + w[16] = 0; + w[17] = 0; + + u32 E[18] = { 0 }; + + expand_key (E, w, pw_len_md5); + + E[ 0] = hc_swap32_S (E[ 0]); + E[ 1] = hc_swap32_S (E[ 1]); + E[ 2] = hc_swap32_S (E[ 2]); + E[ 3] = hc_swap32_S (E[ 3]); + E[ 4] = hc_swap32_S (E[ 4]); + E[ 5] = hc_swap32_S (E[ 5]); + E[ 6] = hc_swap32_S (E[ 6]); + E[ 7] = hc_swap32_S (E[ 7]); + E[ 8] = hc_swap32_S (E[ 8]); + E[ 9] = hc_swap32_S (E[ 9]); + E[10] = hc_swap32_S (E[10]); + E[11] = hc_swap32_S (E[11]); + E[12] = hc_swap32_S (E[12]); + E[13] = hc_swap32_S (E[13]); + E[14] = hc_swap32_S (E[14]); + E[15] = hc_swap32_S (E[15]); + E[16] = hc_swap32_S (E[16]); + E[17] = hc_swap32_S (E[17]); + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].E[i] = E[i]; + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = c_pbox[i]; + } + + #ifdef DYNAMIC_LOCAL + LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; + LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; + LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; + LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + S0[i] = c_sbox0[i]; + S1[i] = c_sbox1[i]; + S2[i] = c_sbox2[i]; + S3[i] = c_sbox3[i]; + } + + // expandstate + + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + u32 L0 = 0; + u32 R0 = 0; + + for (u32 i = 0; i < 18; i += 2) + { + L0 ^= salt_buf[(i & 2) + 0]; + R0 ^= salt_buf[(i & 2) + 1]; + + BF_ENCRYPT (L0, R0); + + P[i + 0] = L0; + P[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S0[i + 0] = L0; + S0[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S0[i + 2] = L0; + S0[i + 3] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S1[i + 0] = L0; + S1[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S1[i + 2] = L0; + S1[i + 3] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S2[i + 0] = L0; + S2[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S2[i + 2] = L0; + S2[i + 3] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S3[i + 0] = L0; + S3[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S3[i + 2] = L0; + S3[i + 3] = R0; + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = S0[i]; + tmps[gid].S1[i] = S1[i]; + tmps[gid].S2[i] = S2[i]; + tmps[gid].S3[i] = S3[i]; + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25600_loop (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 E[18]; + + for (u32 i = 0; i < 18; i++) + { + E[i] = tmps[gid].E[i]; + } + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; + LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; + LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; + LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + S0[i] = tmps[gid].S0[i]; + S1[i] = tmps[gid].S1[i]; + S2[i] = tmps[gid].S2[i]; + S3[i] = tmps[gid].S3[i]; + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * main loop + */ + + u32 L0; + u32 R0; + + for (u32 i = 0; i < loop_cnt; i++) + { + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S0[i + 0] = L0; + S0[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S1[i + 0] = L0; + S1[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S2[i + 0] = L0; + S2[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S3[i + 0] = L0; + S3[i + 1] = R0; + } + + P[ 0] ^= salt_buf[0]; + P[ 1] ^= salt_buf[1]; + P[ 2] ^= salt_buf[2]; + P[ 3] ^= salt_buf[3]; + P[ 4] ^= salt_buf[0]; + P[ 5] ^= salt_buf[1]; + P[ 6] ^= salt_buf[2]; + P[ 7] ^= salt_buf[3]; + P[ 8] ^= salt_buf[0]; + P[ 9] ^= salt_buf[1]; + P[10] ^= salt_buf[2]; + P[11] ^= salt_buf[3]; + P[12] ^= salt_buf[0]; + P[13] ^= salt_buf[1]; + P[14] ^= salt_buf[2]; + P[15] ^= salt_buf[3]; + P[16] ^= salt_buf[0]; + P[17] ^= salt_buf[1]; + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S0[i + 0] = L0; + S0[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S1[i + 0] = L0; + S1[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S2[i + 0] = L0; + S2[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S3[i + 0] = L0; + S3[i + 1] = R0; + } + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = S0[i]; + tmps[gid].S1[i] = S1[i]; + tmps[gid].S2[i] = S2[i]; + tmps[gid].S3[i] = S3[i]; + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25600_comp (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; + LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; + LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; + LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + S0[i] = tmps[gid].S0[i]; + S1[i] = tmps[gid].S1[i]; + S2[i] = tmps[gid].S2[i]; + S3[i] = tmps[gid].S3[i]; + } + + /** + * main + */ + + u32 L0; + u32 R0; + + L0 = BCRYPTM_0; + R0 = BCRYPTM_1; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r0 = L0; + const u32 r1 = R0; + + L0 = BCRYPTM_2; + R0 = BCRYPTM_3; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r2 = L0; + const u32 r3 = R0; + + /* + e = L0; + f = R0; + + f &= ~0xff; // its just 23 not 24 ! + */ + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25700_a0-optimized.cl b/OpenCL/m25700_a0-optimized.cl new file mode 100644 index 000000000..bbd9747e2 --- /dev/null +++ b/OpenCL/m25700_a0-optimized.cl @@ -0,0 +1,199 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#endif + +DECLSPEC u32 MurmurHash (const u32 seed, const u32 *w, const int pw_len) +{ + u32 hash = seed; + + #define M 0x7fd652ad + #define R 16 + + hash += 0xdeadbeef; + + int i; + int j; + + for (i = 0, j = 0; i < pw_len - 3; i += 4, j += 1) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + if (pw_len & 3) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + hash *= M; + hash ^= hash >> 10; + hash *= M; + hash ^= hash >> 17; + + #undef M + #undef R + + return hash; +} + +KERNEL_FQ void m25700_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w[16] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w + 0, w + 4); + + u32x hash = MurmurHash (seed, w, out_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m25700_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m25700_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m25700_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + 0, + 0, + 0 + }; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w[16] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w + 0, w + 4); + + u32x hash = MurmurHash (seed, w, out_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m25700_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m25700_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m25700_a1-optimized.cl b/OpenCL/m25700_a1-optimized.cl new file mode 100644 index 000000000..2d83f0cad --- /dev/null +++ b/OpenCL/m25700_a1-optimized.cl @@ -0,0 +1,310 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//too much register pressure +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#endif + +DECLSPEC u32 MurmurHash (const u32 seed, const u32 *w, const int pw_len) +{ + u32 hash = seed; + + #define M 0x7fd652ad + #define R 16 + + hash += 0xdeadbeef; + + int i; + int j; + + for (i = 0, j = 0; i < pw_len - 3; i += 4, j += 1) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + if (pw_len & 3) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + hash *= M; + hash ^= hash >> 10; + hash *= M; + hash ^= hash >> 17; + + #undef M + #undef R + + return hash; +} + +KERNEL_FQ void m25700_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32 pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32 wordl0[4] = { 0 }; + u32 wordl1[4] = { 0 }; + u32 wordl2[4] = { 0 }; + u32 wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32 wordr0[4] = { 0 }; + u32 wordr1[4] = { 0 }; + u32 wordr2[4] = { 0 }; + u32 wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32 w[16]; + + w[ 0] = wordl0[0] | wordr0[0]; + w[ 1] = wordl0[1] | wordr0[1]; + w[ 2] = wordl0[2] | wordr0[2]; + w[ 3] = wordl0[3] | wordr0[3]; + w[ 4] = wordl1[0] | wordr1[0]; + w[ 5] = wordl1[1] | wordr1[1]; + w[ 6] = wordl1[2] | wordr1[2]; + w[ 7] = wordl1[3] | wordr1[3]; + w[ 8] = wordl2[0] | wordr2[0]; + w[ 9] = wordl2[1] | wordr2[1]; + w[10] = wordl2[2] | wordr2[2]; + w[11] = wordl2[3] | wordr2[3]; + w[12] = wordl3[0] | wordr3[0]; + w[13] = wordl3[1] | wordr3[1]; + w[14] = wordl3[2] | wordr3[2]; + w[15] = wordl3[3] | wordr3[3]; + + const u32 r = MurmurHash (seed, w, pw_len); + + const u32 z = 0; + + COMPARE_M_SIMD (r, z, z, z); + } +} + +KERNEL_FQ void m25700_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m25700_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m25700_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + 0, + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32 pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32 wordl0[4] = { 0 }; + u32 wordl1[4] = { 0 }; + u32 wordl2[4] = { 0 }; + u32 wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32 wordr0[4] = { 0 }; + u32 wordr1[4] = { 0 }; + u32 wordr2[4] = { 0 }; + u32 wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32 w[16]; + + w[ 0] = wordl0[0] | wordr0[0]; + w[ 1] = wordl0[1] | wordr0[1]; + w[ 2] = wordl0[2] | wordr0[2]; + w[ 3] = wordl0[3] | wordr0[3]; + w[ 4] = wordl1[0] | wordr1[0]; + w[ 5] = wordl1[1] | wordr1[1]; + w[ 6] = wordl1[2] | wordr1[2]; + w[ 7] = wordl1[3] | wordr1[3]; + w[ 8] = wordl2[0] | wordr2[0]; + w[ 9] = wordl2[1] | wordr2[1]; + w[10] = wordl2[2] | wordr2[2]; + w[11] = wordl2[3] | wordr2[3]; + w[12] = wordl3[0] | wordr3[0]; + w[13] = wordl3[1] | wordr3[1]; + w[14] = wordl3[2] | wordr3[2]; + w[15] = wordl3[3] | wordr3[3]; + + const u32 r = MurmurHash (seed, w, pw_len); + + const u32 z = 0; + + COMPARE_S_SIMD (r, z, z, z); + } +} + +KERNEL_FQ void m25700_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m25700_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m25700_a3-optimized.cl b/OpenCL/m25700_a3-optimized.cl new file mode 100644 index 000000000..2cc5455ee --- /dev/null +++ b/OpenCL/m25700_a3-optimized.cl @@ -0,0 +1,392 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +DECLSPEC u32x MurmurHash_w0 (const u32 seed, const u32x w0, const u32 *w, const int pw_len) +{ + u32x hash = seed; + + #define M 0x7fd652ad + #define R 16 + + hash += 0xdeadbeef; + + u32x tmp = w0; + + if (pw_len >= 4) + { + hash += w0; + hash *= M; + hash ^= hash >> R; + + int i; + int j; + + for (i = 4, j = 1; i < pw_len - 3; i += 4, j += 1) + { + tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + if (pw_len & 3) + { + tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + } + else + { + if (pw_len & 3) + { + tmp = w0; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + } + + hash *= M; + hash ^= hash >> 10; + hash *= M; + hash ^= hash >> 17; + + #undef M + #undef R + + return hash; +} + +DECLSPEC void m25700m (const u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x hash = MurmurHash_w0 (seed, w0, w, pw_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m25700s (const u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + 0, + 0, + 0 + }; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x hash = MurmurHash_w0 (seed, w0, w, pw_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m25700_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m25800-pure.cl b/OpenCL/m25800-pure.cl new file mode 100644 index 000000000..4205d12a0 --- /dev/null +++ b/OpenCL/m25800-pure.cl @@ -0,0 +1,965 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct bcrypt_tmp +{ + u32 E[18]; + + u32 P[18]; + + u32 S0[256]; + u32 S1[256]; + u32 S2[256]; + u32 S3[256]; + +} bcrypt_tmp_t; + +// http://www.schneier.com/code/constants.txt + +CONSTANT_VK u32a c_sbox0[256] = +{ + 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, + 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, + 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, + 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, + 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, + 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, + 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, + 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, + 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, + 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, + 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, + 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, + 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, + 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, + 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, + 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, + 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, + 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, + 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, + 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, + 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, + 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, + 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, + 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, + 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, + 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, + 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, + 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, + 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, + 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, + 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, + 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, + 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, + 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, + 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, + 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, + 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, + 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, + 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, + 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, + 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, + 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, + 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, + 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, + 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, + 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, + 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, + 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, + 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, + 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, + 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, + 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, + 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, + 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, + 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, + 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, + 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, + 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, + 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, + 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, + 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, + 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, + 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, + 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a +}; + +CONSTANT_VK u32a c_sbox1[256] = +{ + 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, + 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, + 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, + 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, + 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, + 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, + 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, + 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, + 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, + 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, + 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, + 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, + 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, + 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, + 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, + 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, + 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, + 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, + 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, + 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, + 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, + 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, + 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, + 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, + 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, + 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, + 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, + 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, + 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, + 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, + 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, + 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, + 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, + 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, + 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, + 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, + 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, + 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, + 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, + 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, + 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, + 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, + 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, + 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, + 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, + 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, + 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, + 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, + 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, + 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, + 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, + 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, + 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, + 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, + 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, + 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, + 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, + 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, + 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, + 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, + 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, + 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, + 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, + 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7 +}; + +CONSTANT_VK u32a c_sbox2[256] = +{ + 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, + 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, + 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, + 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, + 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, + 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, + 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, + 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, + 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, + 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, + 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, + 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, + 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, + 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, + 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, + 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, + 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, + 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, + 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, + 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, + 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, + 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, + 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, + 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, + 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, + 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, + 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, + 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, + 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, + 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, + 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, + 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, + 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, + 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, + 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, + 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, + 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, + 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, + 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, + 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, + 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, + 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, + 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, + 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, + 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, + 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, + 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, + 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, + 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, + 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, + 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, + 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, + 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, + 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, + 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, + 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, + 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, + 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, + 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, + 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, + 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, + 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, + 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, + 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0 +}; + +CONSTANT_VK u32a c_sbox3[256] = +{ + 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, + 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, + 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, + 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, + 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, + 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, + 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, + 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, + 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, + 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, + 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, + 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, + 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, + 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, + 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, + 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, + 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, + 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, + 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, + 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, + 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, + 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, + 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, + 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, + 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, + 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, + 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, + 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, + 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, + 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, + 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, + 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, + 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, + 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, + 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, + 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, + 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, + 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, + 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, + 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, + 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, + 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, + 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, + 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, + 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, + 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, + 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, + 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, + 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, + 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, + 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, + 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, + 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, + 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, + 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, + 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, + 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, + 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, + 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, + 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, + 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, + 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, + 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, + 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 +}; + +CONSTANT_VK u32a c_pbox[18] = +{ + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, + 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, + 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, + 0x9216d5d9, 0x8979fb1b +}; + +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + const u32 r0 = unpack_v8d_from_v32_S ((L)); \ + const u32 r1 = unpack_v8c_from_v32_S ((L)); \ + const u32 r2 = unpack_v8b_from_v32_S ((L)); \ + const u32 r3 = unpack_v8a_from_v32_S ((L)); \ + \ + tmp = S0[r0]; \ + tmp += S1[r1]; \ + tmp ^= S2[r2]; \ + tmp += S3[r3]; \ + \ + (R) ^= tmp ^ P[(N)]; \ +} + +#define BF_ENCRYPT(L,R) \ +{ \ + L ^= P[0]; \ + \ + BF_ROUND (L, R, 1); \ + BF_ROUND (R, L, 2); \ + BF_ROUND (L, R, 3); \ + BF_ROUND (R, L, 4); \ + BF_ROUND (L, R, 5); \ + BF_ROUND (R, L, 6); \ + BF_ROUND (L, R, 7); \ + BF_ROUND (R, L, 8); \ + BF_ROUND (L, R, 9); \ + BF_ROUND (R, L, 10); \ + BF_ROUND (L, R, 11); \ + BF_ROUND (R, L, 12); \ + BF_ROUND (L, R, 13); \ + BF_ROUND (R, L, 14); \ + BF_ROUND (L, R, 15); \ + BF_ROUND (R, L, 16); \ + \ + u32 tmp; \ + \ + tmp = R; \ + R = L; \ + L = tmp; \ + \ + L ^= P[17]; \ +} + +#ifdef DYNAMIC_LOCAL +extern __shared__ u32 lm[]; +#endif + +DECLSPEC void expand_key (u32 *E, u32 *W, const int len) +{ + u8 *E_ptr = (u8 *) E; + u8 *W_ptr = (u8 *) W; + + for (int pos = 0; pos < 72; pos++) // pos++ is not a bug, we actually want that zero byte here + { + const int left = 72 - pos; + + const int sz = (len < left) ? len : left; // should be MIN() + + for (int i = 0; i < sz; i++) + { + E_ptr[pos + i] = W_ptr[i]; + } + + pos += sz; + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25800_init (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + #ifdef IS_CUDA + + if (gid >= gid_max) return; + + u32 l_bin2asc[256]; + + for (u32 i = 0; i < 256; i++) + + #else + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + + #endif + + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + #ifndef IS_CUDA + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + #endif + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + const u32 e = ctx0.h[4]; + + const u32 pw_len_sha1 = 40; + + w[ 0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w[ 1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w[ 2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w[ 3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w[ 4] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w[ 5] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w[ 6] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w[ 7] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + w[ 8] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + w[ 9] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + w[16] = 0; + w[17] = 0; + + u32 E[18] = { 0 }; + + expand_key (E, w, pw_len_sha1); + + E[ 0] = hc_swap32_S (E[ 0]); + E[ 1] = hc_swap32_S (E[ 1]); + E[ 2] = hc_swap32_S (E[ 2]); + E[ 3] = hc_swap32_S (E[ 3]); + E[ 4] = hc_swap32_S (E[ 4]); + E[ 5] = hc_swap32_S (E[ 5]); + E[ 6] = hc_swap32_S (E[ 6]); + E[ 7] = hc_swap32_S (E[ 7]); + E[ 8] = hc_swap32_S (E[ 8]); + E[ 9] = hc_swap32_S (E[ 9]); + E[10] = hc_swap32_S (E[10]); + E[11] = hc_swap32_S (E[11]); + E[12] = hc_swap32_S (E[12]); + E[13] = hc_swap32_S (E[13]); + E[14] = hc_swap32_S (E[14]); + E[15] = hc_swap32_S (E[15]); + E[16] = hc_swap32_S (E[16]); + E[17] = hc_swap32_S (E[17]); + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].E[i] = E[i]; + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = c_pbox[i]; + } + + #ifdef DYNAMIC_LOCAL + LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; + LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; + LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; + LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + S0[i] = c_sbox0[i]; + S1[i] = c_sbox1[i]; + S2[i] = c_sbox2[i]; + S3[i] = c_sbox3[i]; + } + + // expandstate + + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + u32 L0 = 0; + u32 R0 = 0; + + for (u32 i = 0; i < 18; i += 2) + { + L0 ^= salt_buf[(i & 2) + 0]; + R0 ^= salt_buf[(i & 2) + 1]; + + BF_ENCRYPT (L0, R0); + + P[i + 0] = L0; + P[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S0[i + 0] = L0; + S0[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S0[i + 2] = L0; + S0[i + 3] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S1[i + 0] = L0; + S1[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S1[i + 2] = L0; + S1[i + 3] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S2[i + 0] = L0; + S2[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S2[i + 2] = L0; + S2[i + 3] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + S3[i + 0] = L0; + S3[i + 1] = R0; + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + S3[i + 2] = L0; + S3[i + 3] = R0; + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = S0[i]; + tmps[gid].S1[i] = S1[i]; + tmps[gid].S2[i] = S2[i]; + tmps[gid].S3[i] = S3[i]; + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25800_loop (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 E[18]; + + for (u32 i = 0; i < 18; i++) + { + E[i] = tmps[gid].E[i]; + } + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; + LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; + LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; + LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + S0[i] = tmps[gid].S0[i]; + S1[i] = tmps[gid].S1[i]; + S2[i] = tmps[gid].S2[i]; + S3[i] = tmps[gid].S3[i]; + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * main loop + */ + + u32 L0; + u32 R0; + + for (u32 i = 0; i < loop_cnt; i++) + { + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S0[i + 0] = L0; + S0[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S1[i + 0] = L0; + S1[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S2[i + 0] = L0; + S2[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S3[i + 0] = L0; + S3[i + 1] = R0; + } + + P[ 0] ^= salt_buf[0]; + P[ 1] ^= salt_buf[1]; + P[ 2] ^= salt_buf[2]; + P[ 3] ^= salt_buf[3]; + P[ 4] ^= salt_buf[0]; + P[ 5] ^= salt_buf[1]; + P[ 6] ^= salt_buf[2]; + P[ 7] ^= salt_buf[3]; + P[ 8] ^= salt_buf[0]; + P[ 9] ^= salt_buf[1]; + P[10] ^= salt_buf[2]; + P[11] ^= salt_buf[3]; + P[12] ^= salt_buf[0]; + P[13] ^= salt_buf[1]; + P[14] ^= salt_buf[2]; + P[15] ^= salt_buf[3]; + P[16] ^= salt_buf[0]; + P[17] ^= salt_buf[1]; + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S0[i + 0] = L0; + S0[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S1[i + 0] = L0; + S1[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S2[i + 0] = L0; + S2[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + S3[i + 0] = L0; + S3[i + 1] = R0; + } + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = S0[i]; + tmps[gid].S1[i] = S1[i]; + tmps[gid].S2[i] = S2[i]; + tmps[gid].S3[i] = S3[i]; + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25800_comp (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; + LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; + LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; + LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + S0[i] = tmps[gid].S0[i]; + S1[i] = tmps[gid].S1[i]; + S2[i] = tmps[gid].S2[i]; + S3[i] = tmps[gid].S3[i]; + } + + /** + * main + */ + + u32 L0; + u32 R0; + + L0 = BCRYPTM_0; + R0 = BCRYPTM_1; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r0 = L0; + const u32 r1 = R0; + + L0 = BCRYPTM_2; + R0 = BCRYPTM_3; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r2 = L0; + const u32 r3 = R0; + + /* + e = L0; + f = R0; + + f &= ~0xff; // its just 23 not 24 ! + */ + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25900-pure.cl b/OpenCL/m25900-pure.cl new file mode 100644 index 000000000..36ecd286a --- /dev/null +++ b/OpenCL/m25900-pure.cl @@ -0,0 +1,423 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct blocks +{ + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + +} blocks_t; + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +DECLSPEC void aes128_encrypt_cbc (const u32 *aes_ks, u32 *aes_iv, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + u32 in_s[4]; + + in_s[0] = in[0]; + in_s[1] = in[1]; + in_s[2] = in[2]; + in_s[3] = in[3]; + + in_s[0] ^= aes_iv[0]; + in_s[1] ^= aes_iv[1]; + in_s[2] ^= aes_iv[2]; + in_s[3] ^= aes_iv[3]; + + aes128_encrypt (aes_ks, in_s, out, s_te0, s_te1, s_te2, s_te3, s_te4); + + aes_iv[0] = out[0]; + aes_iv[1] = out[1]; + aes_iv[2] = out[2]; + aes_iv[3] = out[3]; +} + +KERNEL_FQ void m25900_init(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id(0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap(&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap(&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64(&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final(&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + const u64 gid = get_global_id(0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m25900_comp(KERN_ATTR_TMPS_ESALT(pbkdf2_sha256_tmp_t, blocks_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id(0); + const u64 lid = get_local_id(0); + const u64 lsz = get_local_size(0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS(); + + #else + + CONSTANT_AS u32a* s_td0 = td0; + CONSTANT_AS u32a* s_td1 = td1; + CONSTANT_AS u32a* s_td2 = td2; + CONSTANT_AS u32a* s_td3 = td3; + CONSTANT_AS u32a* s_td4 = td4; + + CONSTANT_AS u32a* s_te0 = te0; + CONSTANT_AS u32a* s_te1 = te1; + CONSTANT_AS u32a* s_te2 = te2; + CONSTANT_AS u32a* s_te3 = te3; + CONSTANT_AS u32a* s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 key[4]; + + key[0] = tmps[gid].out[DGST_R0]; + key[1] = tmps[gid].out[DGST_R1]; + key[2] = tmps[gid].out[DGST_R2]; + key[3] = tmps[gid].out[DGST_R3]; + + u32 aes_ks[44]; + + AES128_set_encrypt_key (aes_ks, key, s_te0, s_te1, s_te2, s_te3); + + u32 b0[4] = { 0 }; + + u32 aes_cbc_iv[4] = { 0 }; + + u32 yn[4]; + + const u32 digest_pos = loop_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; + + u32 b1[4]; + + b1[0] = esalt_bufs[digest_cur].b1[0]; + b1[1] = esalt_bufs[digest_cur].b1[1]; + b1[2] = esalt_bufs[digest_cur].b1[2]; + b1[3] = esalt_bufs[digest_cur].b1[3]; + + u32 b2[4]; + + b2[0] = esalt_bufs[digest_cur].b2[0]; + b2[1] = esalt_bufs[digest_cur].b2[1]; + b2[2] = esalt_bufs[digest_cur].b2[2]; + b2[3] = esalt_bufs[digest_cur].b2[3]; + + u32 b3[4]; + + b3[0] = esalt_bufs[digest_cur].b3[0]; + b3[1] = esalt_bufs[digest_cur].b3[1]; + b3[2] = esalt_bufs[digest_cur].b3[2]; + b3[3] = esalt_bufs[digest_cur].b3[3]; + + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b0, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b1, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b2, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b3, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + + u32 nonce[4]; + + nonce[0] = 0; + nonce[1] = 0; + nonce[2] = 0; + nonce[3] = 0x00ff0000; // already swapped + + u32 s0[4]; + + aes128_encrypt(aes_ks, nonce, s0, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = yn[0] ^ s0[0]; + const u32 r1 = yn[1] ^ s0[1]; + const u32 r2 = yn[2] ^ s0[2]; + const u32 r3 = yn[3] ^ s0[3]; + +#define il_pos 0 + +#ifdef KERNEL_STATIC +#include COMPARE_M +#endif +} diff --git a/OpenCL/m26000_a0-pure.cl b/OpenCL/m26000_a0-pure.cl new file mode 100644 index 000000000..c702639fc --- /dev/null +++ b/OpenCL/m26000_a0-pure.cl @@ -0,0 +1,720 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct mozilla_3des +{ + u32 ct_buf[4]; + +} mozilla_3des_t; + +KERNEL_FQ void m26000_mxx (KERN_ATTR_RULES_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26000_sxx (KERN_ATTR_RULES_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26000_a1-pure.cl b/OpenCL/m26000_a1-pure.cl new file mode 100644 index 000000000..c6556965b --- /dev/null +++ b/OpenCL/m26000_a1-pure.cl @@ -0,0 +1,768 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct mozilla_3des +{ + u32 ct_buf[4]; + +} mozilla_3des_t; + +KERNEL_FQ void m26000_mxx (KERN_ATTR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, c, pw_len + comb_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26000_sxx (KERN_ATTR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, c, pw_len + comb_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26000_a3-pure.cl b/OpenCL/m26000_a3-pure.cl new file mode 100644 index 000000000..b02d4a077 --- /dev/null +++ b/OpenCL/m26000_a3-pure.cl @@ -0,0 +1,740 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct mozilla_3des +{ + u32 ct_buf[4]; + +} mozilla_3des_t; + +KERNEL_FQ void m26000_mxx (KERN_ATTR_VECTOR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32 w0lr = w0l | w0r; + + w[0] = w0lr; + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26000_sxx (KERN_ATTR_VECTOR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32 w0lr = w0l | w0r; + + w[0] = w0lr; + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26100-pure.cl b/OpenCL/m26100-pure.cl new file mode 100644 index 000000000..aee82bea4 --- /dev/null +++ b/OpenCL/m26100-pure.cl @@ -0,0 +1,428 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct mozilla_aes_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[8]; + u32 out[8]; + +} mozilla_aes_tmp_t; + +typedef struct mozilla_aes +{ + u32 iv_buf[4]; + u32 ct_buf[4]; + +} mozilla_aes_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m26100_init (KERN_ATTR_TMPS_ESALT (mozilla_aes_tmp_t, mozilla_aes_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + // there's some data in salt_buf[8] and onwards so we need to move this to local variable + + u32 gs[16] = { 0 }; + + gs[0] = salt_bufs[DIGESTS_OFFSET].salt_buf[0]; + gs[1] = salt_bufs[DIGESTS_OFFSET].salt_buf[1]; + gs[2] = salt_bufs[DIGESTS_OFFSET].salt_buf[2]; + gs[3] = salt_bufs[DIGESTS_OFFSET].salt_buf[3]; + gs[4] = salt_bufs[DIGESTS_OFFSET].salt_buf[4]; + + sha1_update_swap (&ctx, gs, 20); + sha1_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); + + sha1_final (&ctx); + + u32 ek[16] = { 0 }; + + ek[0] = ctx.h[0]; + ek[1] = ctx.h[1]; + ek[2] = ctx.h[2]; + ek[3] = ctx.h[3]; + ek[4] = ctx.h[4]; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init (&sha256_hmac_ctx, ek, 20); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + // accessing esalt from _init can lead to false negatives because, + // but we know the global salt is unique for each entry so its fine + + u32 es[16] = { 0 }; + + es[0] = salt_bufs[DIGESTS_OFFSET].salt_buf[ 8]; + es[1] = salt_bufs[DIGESTS_OFFSET].salt_buf[ 9]; + es[2] = salt_bufs[DIGESTS_OFFSET].salt_buf[10]; + es[3] = salt_bufs[DIGESTS_OFFSET].salt_buf[11]; + es[4] = salt_bufs[DIGESTS_OFFSET].salt_buf[12]; + es[5] = salt_bufs[DIGESTS_OFFSET].salt_buf[13]; + es[6] = salt_bufs[DIGESTS_OFFSET].salt_buf[14]; + es[7] = salt_bufs[DIGESTS_OFFSET].salt_buf[15]; + + sha256_hmac_update_swap (&sha256_hmac_ctx, es, 32); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m26100_loop (KERN_ATTR_TMPS_ESALT (mozilla_aes_tmp_t, mozilla_aes_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m26100_comp (KERN_ATTR_TMPS_ESALT (mozilla_aes_tmp_t, mozilla_aes_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 ct_buf[4]; + + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + ct_buf[2] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf[3] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + u32 pt_buf[4]; + + aes256_decrypt (ks, ct_buf, pt_buf, s_td0, s_td1, s_td2, s_td3, s_td4); + + pt_buf[0] ^= iv_buf[0]; + pt_buf[1] ^= iv_buf[1]; + pt_buf[2] ^= iv_buf[2]; + pt_buf[3] ^= iv_buf[3]; + + // password-check\x02\x02 + + if (pt_buf[0] != 0x73736170) return; + if (pt_buf[1] != 0x64726f77) return; + if (pt_buf[2] != 0x6568632d) return; + if (pt_buf[3] != 0x02026b63) return; + + const u32 r0 = ct_buf[0]; + const u32 r1 = ct_buf[1]; + const u32 r2 = ct_buf[2]; + const u32 r3 = ct_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/shared.cl b/OpenCL/shared.cl index 47c37c3c6..a6ae38988 100644 --- a/OpenCL/shared.cl +++ b/OpenCL/shared.cl @@ -212,3 +212,24 @@ KERNEL_FQ void gpu_atinit (GLOBAL_AS pw_t *buf, const u64 gid_max) buf[gid] = pw; } + +KERNEL_FQ void gpu_utf8_to_utf16 (GLOBAL_AS pw_t *pws_buf, const u64 gid_max) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + pw_t pw_in = pws_buf[gid]; + + pw_t pw_out; + + for (int i = 0; i < 64; i++) pw_out.i[i] = 0; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + pw_out.pw_len = hc_enc_next (&hc_enc, pw_in.i, pw_in.pw_len, 256, pw_out.i, 256); + + pws_buf[gid] = pw_out; +} diff --git a/deps/LZMA-SDK/C/7z.h b/deps/LZMA-SDK/C/7z.h index 82813c298..969523cd3 100644 --- a/deps/LZMA-SDK/C/7z.h +++ b/deps/LZMA-SDK/C/7z.h @@ -1,5 +1,5 @@ /* 7z.h -- 7z interface -2017-04-03 : Igor Pavlov : Public domain */ +2018-07-02 : Igor Pavlov : Public domain */ #ifndef __7Z_H #define __7Z_H @@ -91,6 +91,8 @@ typedef struct UInt64 *CoderUnpackSizes; // for all coders in all folders Byte *CodersData; + + UInt64 RangeLimit; } CSzAr; UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); diff --git a/deps/LZMA-SDK/C/7zArcIn.c b/deps/LZMA-SDK/C/7zArcIn.c index 68cc12ff4..7ccc72101 100644 --- a/deps/LZMA-SDK/C/7zArcIn.c +++ b/deps/LZMA-SDK/C/7zArcIn.c @@ -1,5 +1,5 @@ /* 7zArcIn.c -- 7z Input functions -2018-12-31 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) return SZ_OK; } -void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) +static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; @@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) #define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; } -void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) +static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; @@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p) p->CoderUnpackSizes = NULL; p->CodersData = NULL; + + p->RangeLimit = 0; } static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc) @@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) return SZ_ERROR_ARCHIVE; if (propsSize >= 0x80) return SZ_ERROR_UNSUPPORTED; - coder->PropsOffset = sd->Data - dataStart; + coder->PropsOffset = (size_t)(sd->Data - dataStart); coder->PropsSize = (Byte)propsSize; sd->Data += (size_t)propsSize; sd->Size -= (size_t)propsSize; @@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p, { UInt32 numCoders, ci, numInStreams = 0; - p->FoCodersOffsets[fo] = sd.Data - startBufPtr; + p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr); RINOK(SzReadNumber32(&sd, &numCoders)); if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX) @@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p, p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; { - size_t dataSize = sd.Data - startBufPtr; + const size_t dataSize = (size_t)(sd.Data - startBufPtr); p->FoStartPackStreamIndex[fo] = packStreamIndex; p->FoCodersOffsets[fo] = dataSize; MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc); @@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i)) numSubDigests += numStreams; } - ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data; + ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data); continue; } if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd) @@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) { ssi->sdSizes.Data = sd->Data; RINOK(SkipNumbers(sd, numUnpackSizesInData)); - ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data; + ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data); RINOK(ReadID(sd, &type)); } @@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) { ssi->sdCRCs.Data = sd->Data; RINOK(SkipBitUi32s(sd, numSubDigests)); - ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data; + ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data); } else { @@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p, if (type == k7zIdPackInfo) { RINOK(ReadNumber(sd, dataOffset)); + if (*dataOffset > p->RangeLimit) + return SZ_ERROR_ARCHIVE; RINOK(ReadPackInfo(p, sd, alloc)); + if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset) + return SZ_ERROR_ARCHIVE; RINOK(ReadID(sd, &type)); } if (type == k7zIdUnpackInfo) @@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size return SZ_ERROR_ARCHIVE; for (p = data + pos; #ifdef _WIN32 - *(const UInt16 *)p != 0 + *(const UInt16 *)(const void *)p != 0 #else p[0] != 0 || p[1] != 0 #endif ; p += 2); - pos = p - data + 2; + pos = (size_t)(p - data) + 2; *offsets++ = (pos >> 1); } while (--numFiles); @@ -1133,6 +1139,8 @@ static SRes SzReadHeader2( SRes res; SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX, p->startPosAfterHeader, &tempAr, allocTemp); *numTempBufs = tempAr.NumFolders; @@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2( nextHeaderSize = GetUi64(header + 20); nextHeaderCRC = GetUi32(header + 28); - p->startPosAfterHeader = startArcPos + k7zStartHeaderSize; + p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize; if (CrcCalc(header + 12, 20) != GetUi32(header + 8)) return SZ_ERROR_CRC; + p->db.RangeLimit = nextHeaderOffset; + nextHeaderSizeT = (size_t)nextHeaderSize; if (nextHeaderSizeT != nextHeaderSize) return SZ_ERROR_MEM; @@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2( { Int64 pos = 0; RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)); - if ((UInt64)pos < startArcPos + nextHeaderOffset || - (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset || - (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) + if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) return SZ_ERROR_INPUT_EOF; } - RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset)); + RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)); if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp)) return SZ_ERROR_MEM; @@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2( Buf_Init(&tempBuf); SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp); SzAr_Free(&tempAr, allocTemp); diff --git a/deps/LZMA-SDK/C/7zCrc.c b/deps/LZMA-SDK/C/7zCrc.c index 40ab75952..c7ec353d6 100644 --- a/deps/LZMA-SDK/C/7zCrc.c +++ b/deps/LZMA-SDK/C/7zCrc.c @@ -1,5 +1,5 @@ /* 7zCrc.c -- CRC32 init -2017-06-06 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -26,8 +26,20 @@ typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); +extern CRC_FUNC g_CrcUpdateT4; +CRC_FUNC g_CrcUpdateT4; +extern +CRC_FUNC g_CrcUpdateT8; CRC_FUNC g_CrcUpdateT8; +extern +CRC_FUNC g_CrcUpdateT0_32; +CRC_FUNC g_CrcUpdateT0_32; +extern +CRC_FUNC g_CrcUpdateT0_64; +CRC_FUNC g_CrcUpdateT0_64; +extern +CRC_FUNC g_CrcUpdate; CRC_FUNC g_CrcUpdate; UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; @@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U return v; } + +/* ---------- hardware CRC ---------- */ + +#ifdef MY_CPU_LE + +#if defined(MY_CPU_ARM_OR_ARM64) + +// #pragma message("ARM*") + + #if defined(_MSC_VER) + #if defined(MY_CPU_ARM64) + #if (_MSC_VER >= 1910) + #define USE_ARM64_CRC + #endif + #endif + #elif (defined(__clang__) && (__clang_major__ >= 3)) \ + || (defined(__GNUC__) && (__GNUC__ > 4)) + #if !defined(__ARM_FEATURE_CRC32) + #define __ARM_FEATURE_CRC32 1 + #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) + #endif + #endif + #if defined(__ARM_FEATURE_CRC32) + #define USE_ARM64_CRC + #include + #endif + #endif + +#else + +// no hardware CRC + +// #define USE_CRC_EMU + +#ifdef USE_CRC_EMU + +#pragma message("ARM64 CRC emulation") + +MY_FORCE_INLINE +UInt32 __crc32b(UInt32 v, UInt32 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); + return v; +} + +MY_FORCE_INLINE +UInt32 __crc32w(UInt32 v, UInt32 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + return v; +} + +MY_FORCE_INLINE +UInt32 __crc32d(UInt32 v, UInt64 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + return v; +} + +#endif // USE_CRC_EMU + +#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE) + + + +#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) + +#define T0_32_UNROLL_BYTES (4 * 4) +#define T0_64_UNROLL_BYTES (4 * 8) + +#ifndef ATTRIB_CRC +#define ATTRIB_CRC +#endif +// #pragma message("USE ARM HW CRC") + +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) +{ + const Byte *p = (const Byte *)data; + UNUSED_VAR(table); + + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + + if (size >= T0_32_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= (T0_32_UNROLL_BYTES - 1); + lim -= size; + do + { + v = __crc32w(v, *(const UInt32 *)(const void *)(p)); + v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; + v = __crc32w(v, *(const UInt32 *)(const void *)(p)); + v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) +{ + const Byte *p = (const Byte *)data; + UNUSED_VAR(table); + + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + + if (size >= T0_64_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= (T0_64_UNROLL_BYTES - 1); + lim -= size; + do + { + v = __crc32d(v, *(const UInt64 *)(const void *)(p)); + v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; + v = __crc32d(v, *(const UInt64 *)(const void *)(p)); + v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) + +#endif // MY_CPU_LE + + + + void MY_FAST_CALL CrcGenerateTable() { UInt32 i; @@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable() } } #endif + #endif + #ifdef MY_CPU_LE + #ifdef USE_ARM64_CRC + if (CPU_IsSupported_CRC32()) + { + g_CrcUpdateT0_32 = CrcUpdateT0_32; + g_CrcUpdateT0_64 = CrcUpdateT0_64; + g_CrcUpdate = + #if defined(MY_CPU_ARM) + CrcUpdateT0_32; + #else + CrcUpdateT0_64; + #endif + } + #endif + + #ifdef USE_CRC_EMU + g_CrcUpdateT0_32 = CrcUpdateT0_32; + g_CrcUpdateT0_64 = CrcUpdateT0_64; + g_CrcUpdate = CrcUpdateT0_64; + #endif #endif } diff --git a/deps/LZMA-SDK/C/7zCrcOpt.c b/deps/LZMA-SDK/C/7zCrcOpt.c index 2ee0de845..efaa7ab9d 100644 --- a/deps/LZMA-SDK/C/7zCrcOpt.c +++ b/deps/LZMA-SDK/C/7zCrcOpt.c @@ -1,5 +1,5 @@ /* 7zCrcOpt.c -- CRC32 calculation -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -9,6 +9,7 @@ #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U v = CRC_UPDATE_BYTE_2(v, *p); for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x300)[((v ) & 0xFF)] ^ (table + 0x200)[((v >> 8) & 0xFF)] @@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U return v; } +UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U for (; size >= 8; size -= 8, p += 8) { UInt32 d; - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x700)[((v ) & 0xFF)] ^ (table + 0x600)[((v >> 8) & 0xFF)] ^ (table + 0x500)[((v >> 16) & 0xFF)] ^ (table + 0x400)[((v >> 24))]; - d = *((const UInt32 *)p + 1); + d = *((const UInt32 *)(const void *)p + 1); v ^= (table + 0x300)[((d ) & 0xFF)] ^ (table + 0x200)[((d >> 8) & 0xFF)] @@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co v = CRC_UPDATE_BYTE_2_BE(v, *p); for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x000)[((v ) & 0xFF)] ^ (table + 0x100)[((v >> 8) & 0xFF)] @@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co for (; size >= 8; size -= 8, p += 8) { UInt32 d; - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x400)[((v ) & 0xFF)] ^ (table + 0x500)[((v >> 8) & 0xFF)] ^ (table + 0x600)[((v >> 16) & 0xFF)] ^ (table + 0x700)[((v >> 24))]; - d = *((const UInt32 *)p + 1); + d = *((const UInt32 *)(const void *)p + 1); v ^= (table + 0x000)[((d ) & 0xFF)] ^ (table + 0x100)[((d >> 8) & 0xFF)] diff --git a/deps/LZMA-SDK/C/7zDec.c b/deps/LZMA-SDK/C/7zDec.c index 2a7b09030..83e37d166 100644 --- a/deps/LZMA-SDK/C/7zDec.c +++ b/deps/LZMA-SDK/C/7zDec.c @@ -1,5 +1,5 @@ /* 7zDec.c -- Decoding from 7z folder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -21,17 +21,20 @@ #endif #define k_Copy 0 -#define k_Delta 3 +#ifndef _7Z_NO_METHOD_LZMA2 #define k_LZMA2 0x21 +#endif #define k_LZMA 0x30101 -#define k_BCJ 0x3030103 #define k_BCJ2 0x303011B +#ifndef _7Z_NO_METHODS_FILTERS +#define k_Delta 3 +#define k_BCJ 0x3030103 #define k_PPC 0x3030205 #define k_IA64 0x3030401 #define k_ARM 0x3030501 #define k_ARMT 0x3030701 #define k_SPARC 0x3030805 - +#endif #ifdef _7ZIP_PPMD_SUPPPORT @@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp) return *p->cur++; if (p->res == SZ_OK) { - size_t size = p->cur - p->begin; + size_t size = (size_t)(p->cur - p->begin); p->processed += size; p->res = ILookInStream_Skip(p->inStream, size); size = (1 << 25); @@ -101,28 +104,32 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c Ppmd7_Init(&ppmd, order); } { - CPpmd7z_RangeDec rc; - Ppmd7z_RangeDec_CreateVTable(&rc); - rc.Stream = &s.vt; - if (!Ppmd7z_RangeDec_Init(&rc)) + ppmd.rc.dec.Stream = &s.vt; + if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec)) res = SZ_ERROR_DATA; - else if (s.extra) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else + else if (!s.extra) { - SizeT i; - for (i = 0; i < outSize; i++) + Byte *buf = outBuffer; + const Byte *lim = buf + outSize; + for (; buf != lim; buf++) { - int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt); + int sym = Ppmd7z_DecodeSymbol(&ppmd); if (s.extra || sym < 0) break; - outBuffer[i] = (Byte)sym; + *buf = (Byte)sym; } - if (i != outSize) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc)) + if (buf != lim) + res = SZ_ERROR_DATA; + else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) + { + /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */ res = SZ_ERROR_DATA; + } } + if (s.extra) + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + else if (s.processed + (size_t)(s.cur - s.begin) != inSize) + res = SZ_ERROR_DATA; } Ppmd7_Free(&ppmd, allocMain); return res; @@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f) return SZ_ERROR_UNSUPPORTED; } +#ifndef _7Z_NO_METHODS_FILTERS #define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break; +#endif static SRes SzFolder_Decode2(const CSzFolder *folder, const Byte *propsData, diff --git a/deps/LZMA-SDK/C/7zFile.c b/deps/LZMA-SDK/C/7zFile.c index e486901e3..900125d52 100644 --- a/deps/LZMA-SDK/C/7zFile.c +++ b/deps/LZMA-SDK/C/7zFile.c @@ -1,5 +1,5 @@ /* 7zFile.c -- File IO -2017-04-03 : Igor Pavlov : Public domain */ +2021-04-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,9 +7,19 @@ #ifndef USE_WINDOWS_FILE -#ifndef UNDER_CE -#include -#endif + #include + + #ifndef USE_FOPEN + #include + #include + #ifdef _WIN32 + #include + typedef int ssize_t; + typedef int off_t; + #else + #include + #endif + #endif #else @@ -23,30 +33,36 @@ And message can be "Network connection was lost" */ -#define kChunkSizeMax (1 << 22) - #endif +#define kChunkSizeMax (1 << 22) + void File_Construct(CSzFile *p) { #ifdef USE_WINDOWS_FILE p->handle = INVALID_HANDLE_VALUE; - #else + #elif defined(USE_FOPEN) p->file = NULL; + #else + p->fd = -1; #endif } #if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE) + static WRes File_Open(CSzFile *p, const char *name, int writeMode) { #ifdef USE_WINDOWS_FILE + p->handle = CreateFileA(name, writeMode ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ, NULL, writeMode ? CREATE_ALWAYS : OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); - #else + + #elif defined(USE_FOPEN) + p->file = fopen(name, writeMode ? "wb+" : "rb"); return (p->file != 0) ? 0 : #ifdef UNDER_CE @@ -54,13 +70,34 @@ static WRes File_Open(CSzFile *p, const char *name, int writeMode) #else errno; #endif + + #else + + int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY); + #ifdef O_BINARY + flags |= O_BINARY; + #endif + p->fd = open(name, flags, 0666); + return (p->fd != -1) ? 0 : errno; + #endif } WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); } -WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); } + +WRes OutFile_Open(CSzFile *p, const char *name) +{ + #if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN) + return File_Open(p, name, 1); + #else + p->fd = creat(name, 0666); + return (p->fd != -1) ? 0 : errno; + #endif +} + #endif + #ifdef USE_WINDOWS_FILE static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode) { @@ -78,74 +115,124 @@ WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1 WRes File_Close(CSzFile *p) { #ifdef USE_WINDOWS_FILE + if (p->handle != INVALID_HANDLE_VALUE) { if (!CloseHandle(p->handle)) return GetLastError(); p->handle = INVALID_HANDLE_VALUE; } - #else + + #elif defined(USE_FOPEN) + if (p->file != NULL) { int res = fclose(p->file); if (res != 0) + { + if (res == EOF) + return errno; return res; + } p->file = NULL; } + + #else + + if (p->fd != -1) + { + if (close(p->fd) != 0) + return errno; + p->fd = -1; + } + #endif + return 0; } + WRes File_Read(CSzFile *p, void *data, size_t *size) { size_t originalSize = *size; + *size = 0; if (originalSize == 0) return 0; #ifdef USE_WINDOWS_FILE - *size = 0; do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; DWORD processed = 0; - BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); + const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); data = (void *)((Byte *)data + processed); originalSize -= processed; *size += processed; if (!res) return GetLastError(); + // debug : we can break here for partial reading mode + if (processed == 0) + break; + } + while (originalSize > 0); + + #elif defined(USE_FOPEN) + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const size_t processed = fread(data, 1, curSize, p->file); + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= processed; + *size += processed; + if (processed != curSize) + return ferror(p->file); + // debug : we can break here for partial reading mode if (processed == 0) break; } while (originalSize > 0); - return 0; #else - - *size = fread(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const ssize_t processed = read(p->fd, data, curSize); + if (processed == -1) + return errno; + if (processed == 0) + break; + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= (size_t)processed; + *size += (size_t)processed; + // debug : we can break here for partial reading mode + // break; + } + while (originalSize > 0); + #endif + + return 0; } + WRes File_Write(CSzFile *p, const void *data, size_t *size) { size_t originalSize = *size; + *size = 0; if (originalSize == 0) return 0; #ifdef USE_WINDOWS_FILE - *size = 0; do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; DWORD processed = 0; - BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); - data = (void *)((Byte *)data + processed); + const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); + data = (const void *)((const Byte *)data + processed); originalSize -= processed; *size += processed; if (!res) @@ -154,26 +241,52 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size) break; } while (originalSize > 0); - return 0; + + #elif defined(USE_FOPEN) + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const size_t processed = fwrite(data, 1, curSize, p->file); + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= processed; + *size += processed; + if (processed != curSize) + return ferror(p->file); + if (processed == 0) + break; + } + while (originalSize > 0); #else - *size = fwrite(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const ssize_t processed = write(p->fd, data, curSize); + if (processed == -1) + return errno; + if (processed == 0) + break; + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= (size_t)processed; + *size += (size_t)processed; + } + while (originalSize > 0); + #endif + + return 0; } + WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) { #ifdef USE_WINDOWS_FILE - LARGE_INTEGER value; DWORD moveMethod; - value.LowPart = (DWORD)*pos; - value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ + UInt32 low = (UInt32)*pos; + LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ switch (origin) { case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; @@ -181,34 +294,52 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) case SZ_SEEK_END: moveMethod = FILE_END; break; default: return ERROR_INVALID_PARAMETER; } - value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); - if (value.LowPart == 0xFFFFFFFF) + low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod); + if (low == (UInt32)0xFFFFFFFF) { WRes res = GetLastError(); if (res != NO_ERROR) return res; } - *pos = ((Int64)value.HighPart << 32) | value.LowPart; + *pos = ((Int64)high << 32) | low; return 0; #else - int moveMethod; - int res; + int moveMethod; // = origin; + switch (origin) { case SZ_SEEK_SET: moveMethod = SEEK_SET; break; case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; case SZ_SEEK_END: moveMethod = SEEK_END; break; - default: return 1; + default: return EINVAL; } - res = fseek(p->file, (long)*pos, moveMethod); - *pos = ftell(p->file); - return res; - #endif + #if defined(USE_FOPEN) + { + int res = fseek(p->file, (long)*pos, moveMethod); + if (res == -1) + return errno; + *pos = ftell(p->file); + if (*pos == -1) + return errno; + return 0; + } + #else + { + off_t res = lseek(p->fd, (off_t)*pos, moveMethod); + if (res == -1) + return errno; + *pos = res; + return 0; + } + + #endif // USE_FOPEN + #endif // USE_WINDOWS_FILE } + WRes File_GetLength(CSzFile *p, UInt64 *length) { #ifdef USE_WINDOWS_FILE @@ -224,13 +355,31 @@ WRes File_GetLength(CSzFile *p, UInt64 *length) *length = (((UInt64)sizeHigh) << 32) + sizeLow; return 0; - #else + #elif defined(USE_FOPEN) long pos = ftell(p->file); int res = fseek(p->file, 0, SEEK_END); *length = ftell(p->file); fseek(p->file, pos, SEEK_SET); return res; + + #else + + off_t pos; + *length = 0; + pos = lseek(p->fd, 0, SEEK_CUR); + if (pos != -1) + { + const off_t len2 = lseek(p->fd, 0, SEEK_END); + const off_t res2 = lseek(p->fd, pos, SEEK_SET); + if (len2 != -1) + { + *length = (UInt64)len2; + if (res2 != -1) + return 0; + } + } + return errno; #endif } @@ -241,7 +390,9 @@ WRes File_GetLength(CSzFile *p, UInt64 *length) static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size) { CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt); - return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; + WRes wres = File_Read(&p->file, buf, size); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } void FileSeqInStream_CreateVTable(CFileSeqInStream *p) @@ -255,13 +406,17 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p) static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size) { CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); - return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; + WRes wres = File_Read(&p->file, buf, size); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin) { CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); - return File_Seek(&p->file, pos, origin); + WRes wres = File_Seek(&p->file, pos, origin); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } void FileInStream_CreateVTable(CFileInStream *p) @@ -276,7 +431,8 @@ void FileInStream_CreateVTable(CFileInStream *p) static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size) { CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt); - File_Write(&p->file, data, &size); + WRes wres = File_Write(&p->file, data, &size); + p->wres = wres; return size; } diff --git a/deps/LZMA-SDK/C/7zFile.h b/deps/LZMA-SDK/C/7zFile.h index 7e263bea1..c7a30fc2b 100644 --- a/deps/LZMA-SDK/C/7zFile.h +++ b/deps/LZMA-SDK/C/7zFile.h @@ -1,17 +1,20 @@ /* 7zFile.h -- File IO -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-15 : Igor Pavlov : Public domain */ #ifndef __7Z_FILE_H #define __7Z_FILE_H #ifdef _WIN32 #define USE_WINDOWS_FILE +// #include #endif #ifdef USE_WINDOWS_FILE #include #else -#include +// note: USE_FOPEN mode is limited to 32-bit file size +// #define USE_FOPEN +// #include #endif #include "7zTypes.h" @@ -24,8 +27,10 @@ typedef struct { #ifdef USE_WINDOWS_FILE HANDLE handle; - #else + #elif defined(USE_FOPEN) FILE *file; + #else + int fd; #endif } CSzFile; @@ -56,6 +61,7 @@ typedef struct { ISeqInStream vt; CSzFile file; + WRes wres; } CFileSeqInStream; void FileSeqInStream_CreateVTable(CFileSeqInStream *p); @@ -65,6 +71,7 @@ typedef struct { ISeekInStream vt; CSzFile file; + WRes wres; } CFileInStream; void FileInStream_CreateVTable(CFileInStream *p); @@ -74,6 +81,7 @@ typedef struct { ISeqOutStream vt; CSzFile file; + WRes wres; } CFileOutStream; void FileOutStream_CreateVTable(CFileOutStream *p); diff --git a/deps/LZMA-SDK/C/7zStream.c b/deps/LZMA-SDK/C/7zStream.c index 579741fad..4b472a41d 100644 --- a/deps/LZMA-SDK/C/7zStream.c +++ b/deps/LZMA-SDK/C/7zStream.c @@ -1,5 +1,5 @@ /* 7zStream.c -- 7z Stream functions -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) { - Int64 t = offset; + Int64 t = (Int64)offset; return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); } diff --git a/deps/LZMA-SDK/C/7zTypes.h b/deps/LZMA-SDK/C/7zTypes.h index 593f5aa25..497b14506 100644 --- a/deps/LZMA-SDK/C/7zTypes.h +++ b/deps/LZMA-SDK/C/7zTypes.h @@ -1,11 +1,13 @@ /* 7zTypes.h -- Basic types -2018-08-04 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H #ifdef _WIN32 /* #include */ +#else +#include #endif #include @@ -43,18 +45,112 @@ EXTERN_C_BEGIN typedef int SRes; +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + #ifdef _WIN32 /* typedef DWORD WRes; */ typedef unsigned WRes; #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) -#else +#else // _WIN32 +// #define ENV_HAVE_LSTAT typedef int WRes; -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_WIN32 -#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY__FACILITY_ERRNO 0x800 +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY__FACILITY__WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY__FACILITY__WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits +typedef long INT_PTR; +typedef unsigned long UINT_PTR; + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ #endif @@ -63,6 +159,10 @@ typedef int WRes; #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #endif +#ifndef RINOK_WRes +#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#endif + typedef unsigned char Byte; typedef short Int16; typedef unsigned short UInt16; @@ -75,6 +175,38 @@ typedef int Int32; typedef unsigned int UInt32; #endif + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + + #ifdef _SZ_NO_INT_64 /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. @@ -128,25 +260,37 @@ typedef int BoolInt; #define MY_CDECL __cdecl #define MY_FAST_CALL __fastcall -#else +#else // _MSC_VER +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) +#define MY_NO_INLINE __attribute__((noinline)) +// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#else #define MY_NO_INLINE +#endif + #define MY_FORCE_INLINE -#define MY_CDECL -#define MY_FAST_CALL -/* inline keyword : for C++ / C99 */ -/* GCC, clang: */ -/* -#if defined (__GNUC__) && (__GNUC__ >= 4) -#define MY_FORCE_INLINE __attribute__((always_inline)) -#define MY_NO_INLINE __attribute__((noinline)) -#endif -*/ +#define MY_CDECL +#if defined(_M_IX86) \ + || defined(__i386__) +// #define MY_FAST_CALL __attribute__((fastcall)) +// #define MY_FAST_CALL __attribute__((cdecl)) +#define MY_FAST_CALL +#elif defined(MY_CPU_AMD64) +// #define MY_FAST_CALL __attribute__((ms_abi)) +#define MY_FAST_CALL +#else +#define MY_FAST_CALL #endif +#endif // _MSC_VER + /* The following interfaces use first parameter as pointer to structure */ @@ -335,12 +479,11 @@ struct ISzAlloc GCC 4.8.1 : classes with non-public variable members" */ -#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) - +#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) #endif -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) /* #define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) @@ -353,6 +496,7 @@ struct ISzAlloc */ +#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) #ifdef _WIN32 diff --git a/deps/LZMA-SDK/C/7zVersion.h b/deps/LZMA-SDK/C/7zVersion.h index 0074c64be..0fe636abc 100644 --- a/deps/LZMA-SDK/C/7zVersion.h +++ b/deps/LZMA-SDK/C/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 19 -#define MY_VER_MINOR 00 +#define MY_VER_MAJOR 21 +#define MY_VER_MINOR 02 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "19.00" +#define MY_VERSION_NUMBERS "21.02 alpha" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2019-02-21" +#define MY_DATE "2021-05-06" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/deps/LZMA-SDK/C/7zip_gcc_c.mak b/deps/LZMA-SDK/C/7zip_gcc_c.mak new file mode 100644 index 000000000..00ecfb043 --- /dev/null +++ b/deps/LZMA-SDK/C/7zip_gcc_c.mak @@ -0,0 +1,301 @@ + +MY_ARCH_2 = $(MY_ARCH) + +MY_ASM = jwasm +MY_ASM = asmc + +PROGPATH = $(O)/$(PROG) + + +# for object file +CFLAGS_BASE_LIST = -c +# for ASM file +# CFLAGS_BASE_LIST = -S +CFLAGS_BASE = $(MY_ARCH_2) -O2 $(CFLAGS_BASE_LIST) -Wall -Werror -Wextra $(CFLAGS_WARN) \ + -DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE + + +LDFLAGS_STATIC = -DNDEBUG +# -static + +ifdef SystemDrive +IS_MINGW = 1 +endif + +ifdef DEF_FILE + + +ifdef IS_MINGW +SHARED_EXT=.dll +LDFLAGS = -shared -DEF $(DEF_FILE) $(LDFLAGS_STATIC) +else +SHARED_EXT=.so +LDFLAGS = -shared -fPIC $(LDFLAGS_STATIC) +CC_SHARED=-fPIC +endif + + +else + +LDFLAGS = $(LDFLAGS_STATIC) +# -s is not required for clang, do we need it for GGC ??? +# -s + +#-static -static-libgcc -static-libstdc++ + +ifdef IS_MINGW +SHARED_EXT=.exe +else +SHARED_EXT= +endif + +endif + + +PROGPATH = $(O)/$(PROG)$(SHARED_EXT) + + +ifndef O +O=_o +endif + +ifdef IS_MINGW + +RM = del +MY_MKDIR=mkdir +LIB2 = -loleaut32 -luuid -ladvapi32 -lUser32 + + +CXXFLAGS_EXTRA = -DUNICODE -D_UNICODE +# -Wno-delete-non-virtual-dtor + +DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll + +else + +RM = rm -f +MY_MKDIR=mkdir -p +# CFLAGS_BASE := $(CFLAGS_BASE) -D_7ZIP_ST +# CXXFLAGS_EXTRA = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE + +# LOCAL_LIBS=-lpthread +# LOCAL_LIBS_DLL=$(LOCAL_LIBS) -ldl +LIB2 = -lpthread -ldl + +DEL_OBJ_EXE = -$(RM) $(PROGPATH) $(OBJS) + +endif + + + +CFLAGS = $(LOCAL_FLAGS) $(CFLAGS_BASE2) $(CFLAGS_BASE) $(CC_SHARED) -o $@ + + +ifdef IS_X64 +AFLAGS_ABI = -elf64 -DABI_LINUX +else +AFLAGS_ABI = -elf -DABI_LINUX -DABI_CDECL +# -DABI_CDECL +# -DABI_LINUX +# -DABI_CDECL +endif +AFLAGS = $(AFLAGS_ABI) -Fo$(O)/ + + +CXX_WARN_FLAGS = +#-Wno-invalid-offsetof +#-Wno-reorder + +CXXFLAGS = $(LOCAL_FLAGS) $(CXXFLAGS_BASE2) $(CFLAGS_BASE) $(CXXFLAGS_EXTRA) $(CC_SHARED) -o $@ $(CXX_WARN_FLAGS) + +all: $(O) $(PROGPATH) + +$(O): + $(MY_MKDIR) $(O) + +$(PROGPATH): $(OBJS) + $(CXX) -s -o $(PROGPATH) $(MY_ARCH_2) $(LDFLAGS) $(OBJS) $(MY_LIBS) $(LIB2) + + + +ifndef NO_DEFAULT_RES +$O/resource.o: resource.rc + windres.exe $(RFLAGS) resource.rc $O/resource.o +endif + + + +$O/7zAlloc.o: ../../../C/7zAlloc.c + $(CC) $(CFLAGS) $< +$O/7zArcIn.o: ../../../C/7zArcIn.c + $(CC) $(CFLAGS) $< +$O/7zBuf.o: ../../../C/7zBuf.c + $(CC) $(CFLAGS) $< +$O/7zBuf2.o: ../../../C/7zBuf2.c + $(CC) $(CFLAGS) $< +$O/7zCrc.o: ../../../C/7zCrc.c + $(CC) $(CFLAGS) $< +$O/7zDec.o: ../../../C/7zDec.c + $(CC) $(CFLAGS) $< +$O/7zFile.o: ../../../C/7zFile.c + $(CC) $(CFLAGS) $< +$O/7zStream.o: ../../../C/7zStream.c + $(CC) $(CFLAGS) $< +$O/Aes.o: ../../../C/Aes.c + $(CC) $(CFLAGS) $< +$O/Alloc.o: ../../../C/Alloc.c + $(CC) $(CFLAGS) $< +$O/Bcj2.o: ../../../C/Bcj2.c + $(CC) $(CFLAGS) $< +$O/Bcj2Enc.o: ../../../C/Bcj2Enc.c + $(CC) $(CFLAGS) $< +$O/Blake2s.o: ../../../C/Blake2s.c + $(CC) $(CFLAGS) $< +$O/Bra.o: ../../../C/Bra.c + $(CC) $(CFLAGS) $< +$O/Bra86.o: ../../../C/Bra86.c + $(CC) $(CFLAGS) $< +$O/BraIA64.o: ../../../C/BraIA64.c + $(CC) $(CFLAGS) $< +$O/BwtSort.o: ../../../C/BwtSort.c + $(CC) $(CFLAGS) $< + +$O/CpuArch.o: ../../../C/CpuArch.c + $(CC) $(CFLAGS) $< +$O/Delta.o: ../../../C/Delta.c + $(CC) $(CFLAGS) $< +$O/DllSecur.o: ../../../C/DllSecur.c + $(CC) $(CFLAGS) $< +$O/HuffEnc.o: ../../../C/HuffEnc.c + $(CC) $(CFLAGS) $< +$O/LzFind.o: ../../../C/LzFind.c + $(CC) $(CFLAGS) $< + +# ifdef MT_FILES +$O/LzFindMt.o: ../../../C/LzFindMt.c + $(CC) $(CFLAGS) $< + +$O/Threads.o: ../../../C/Threads.c + $(CC) $(CFLAGS) $< +# endif + +$O/LzmaEnc.o: ../../../C/LzmaEnc.c + $(CC) $(CFLAGS) $< +$O/Lzma86Dec.o: ../../../C/Lzma86Dec.c + $(CC) $(CFLAGS) $< +$O/Lzma86Enc.o: ../../../C/Lzma86Enc.c + $(CC) $(CFLAGS) $< +$O/Lzma2Dec.o: ../../../C/Lzma2Dec.c + $(CC) $(CFLAGS) $< +$O/Lzma2DecMt.o: ../../../C/Lzma2DecMt.c + $(CC) $(CFLAGS) $< +$O/Lzma2Enc.o: ../../../C/Lzma2Enc.c + $(CC) $(CFLAGS) $< +$O/LzmaLib.o: ../../../C/LzmaLib.c + $(CC) $(CFLAGS) $< +$O/MtCoder.o: ../../../C/MtCoder.c + $(CC) $(CFLAGS) $< +$O/MtDec.o: ../../../C/MtDec.c + $(CC) $(CFLAGS) $< +$O/Ppmd7.o: ../../../C/Ppmd7.c + $(CC) $(CFLAGS) $< +$O/Ppmd7aDec.o: ../../../C/Ppmd7aDec.c + $(CC) $(CFLAGS) $< +$O/Ppmd7Dec.o: ../../../C/Ppmd7Dec.c + $(CC) $(CFLAGS) $< +$O/Ppmd7Enc.o: ../../../C/Ppmd7Enc.c + $(CC) $(CFLAGS) $< +$O/Ppmd8.o: ../../../C/Ppmd8.c + $(CC) $(CFLAGS) $< +$O/Ppmd8Dec.o: ../../../C/Ppmd8Dec.c + $(CC) $(CFLAGS) $< +$O/Ppmd8Enc.o: ../../../C/Ppmd8Enc.c + $(CC) $(CFLAGS) $< +$O/Sha1.o: ../../../C/Sha1.c + $(CC) $(CFLAGS) $< +$O/Sha256.o: ../../../C/Sha256.c + $(CC) $(CFLAGS) $< +$O/Sort.o: ../../../C/Sort.c + $(CC) $(CFLAGS) $< +$O/Xz.o: ../../../C/Xz.c + $(CC) $(CFLAGS) $< +$O/XzCrc64.o: ../../../C/XzCrc64.c + $(CC) $(CFLAGS) $< + + +ifdef USE_ASM +ifdef IS_X64 +USE_X86_ASM=1 +else +ifdef IS_X86 +USE_X86_ASM=1 +endif +endif +endif + +ifdef USE_X86_ASM +$O/7zCrcOpt.o: ../../../Asm/x86/7zCrcOpt.asm + $(MY_ASM) $(AFLAGS) $< +$O/XzCrc64Opt.o: ../../../Asm/x86/XzCrc64Opt.asm + $(MY_ASM) $(AFLAGS) $< +$O/AesOpt.o: ../../../Asm/x86/AesOpt.asm + $(MY_ASM) $(AFLAGS) $< +$O/Sha1Opt.o: ../../../Asm/x86/Sha1Opt.asm + $(MY_ASM) $(AFLAGS) $< +$O/Sha256Opt.o: ../../../Asm/x86/Sha256Opt.asm + $(MY_ASM) $(AFLAGS) $< +else +$O/7zCrcOpt.o: ../../7zCrcOpt.c + $(CC) $(CFLAGS) $< +$O/XzCrc64Opt.o: ../../XzCrc64Opt.c + $(CC) $(CFLAGS) $< +$O/Sha1Opt.o: ../../Sha1Opt.c + $(CC) $(CFLAGS) $< +$O/Sha256Opt.o: ../../Sha256Opt.c + $(CC) $(CFLAGS) $< +$O/AesOpt.o: ../../AesOpt.c + $(CC) $(CFLAGS) $< +endif + + +ifdef USE_LZMA_DEC_ASM + +ifdef IS_X64 +$O/LzmaDecOpt.o: ../../../Asm/x86/LzmaDecOpt.asm + $(MY_ASM) $(AFLAGS) $< +endif + +ifdef IS_ARM64 +$O/LzmaDecOpt.o: ../../../Asm/arm64/LzmaDecOpt.S ../../../Asm/arm64/7zAsm.S + $(CC) $(CFLAGS) $< +endif + +$O/LzmaDec.o: ../../LzmaDec.c + $(CC) $(CFLAGS) -D_LZMA_DEC_OPT $< + +else + +$O/LzmaDec.o: ../../LzmaDec.c + $(CC) $(CFLAGS) $< + +endif + + + +$O/XzDec.o: ../../../C/XzDec.c + $(CC) $(CFLAGS) $< +$O/XzEnc.o: ../../../C/XzEnc.c + $(CC) $(CFLAGS) $< +$O/XzIn.o: ../../../C/XzIn.c + $(CC) $(CFLAGS) $< + + +$O/7zMain.o: ../../../C/Util/7z/7zMain.c + $(CC) $(CFLAGS) $< +$O/LzmaUtil.o: ../../../C/Util/Lzma/LzmaUtil.c + $(CC) $(CFLAGS) $< + + + +clean: + -$(DEL_OBJ_EXE) diff --git a/deps/LZMA-SDK/C/Aes.c b/deps/LZMA-SDK/C/Aes.c index 8f7d50ea2..0f0ddc87a 100644 --- a/deps/LZMA-SDK/C/Aes.c +++ b/deps/LZMA-SDK/C/Aes.c @@ -1,10 +1,17 @@ /* Aes.c -- AES encryption / decryption -2017-01-24 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "Aes.h" #include "CpuArch.h" +#include "Aes.h" + +AES_CODE_FUNC g_AesCbc_Decode; +#ifndef _SFX +AES_CODE_FUNC g_AesCbc_Encode; +AES_CODE_FUNC g_AesCtr_Code; +UInt32 g_Aes_SupportedFunctions_Flags; +#endif static UInt32 T[256 * 4]; static const Byte Sbox[256] = { @@ -25,23 +32,10 @@ static const Byte Sbox[256] = { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; -void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); - -void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks); - -AES_CODE_FUNC g_AesCbc_Encode; -AES_CODE_FUNC g_AesCbc_Decode; -AES_CODE_FUNC g_AesCtr_Code; static UInt32 D[256 * 4]; static Byte InvS[256]; -static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }; - #define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF) #define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24)) @@ -57,6 +51,36 @@ static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0 #define DD(x) (D + (x << 8)) +// #define _SHOW_AES_STATUS + +#ifdef MY_CPU_X86_OR_AMD64 + #define USE_HW_AES +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_AES + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define USE_HW_AES + #endif + #elif defined(_MSC_VER) + #if _MSC_VER >= 1910 + #define USE_HW_AES + #endif + #endif +#endif + +#ifdef USE_HW_AES +#ifdef _SHOW_AES_STATUS +#include +#define _PRF(x) x +#else +#define _PRF(x) +#endif +#endif + + void AesGenTables(void) { unsigned i; @@ -90,18 +114,48 @@ void AesGenTables(void) } } - g_AesCbc_Encode = AesCbc_Encode; - g_AesCbc_Decode = AesCbc_Decode; - g_AesCtr_Code = AesCtr_Code; + { + AES_CODE_FUNC d = AesCbc_Decode; + #ifndef _SFX + AES_CODE_FUNC e = AesCbc_Encode; + AES_CODE_FUNC c = AesCtr_Code; + UInt32 flags = 0; + #endif - #ifdef MY_CPU_X86_OR_AMD64 - if (CPU_Is_Aes_Supported()) + #ifdef USE_HW_AES + if (CPU_IsSupported_AES()) { - g_AesCbc_Encode = AesCbc_Encode_Intel; - g_AesCbc_Decode = AesCbc_Decode_Intel; - g_AesCtr_Code = AesCtr_Code_Intel; + // #pragma message ("AES HW") + _PRF(printf("\n===AES HW\n")); + d = AesCbc_Decode_HW; + + #ifndef _SFX + e = AesCbc_Encode_HW; + c = AesCtr_Code_HW; + flags = k_Aes_SupportedFunctions_HW; + #endif + + #ifdef MY_CPU_X86_OR_AMD64 + if (CPU_IsSupported_VAES_AVX2()) + { + _PRF(printf("\n===vaes avx2\n")); + d = AesCbc_Decode_HW_256; + #ifndef _SFX + c = AesCtr_Code_HW_256; + flags |= k_Aes_SupportedFunctions_HW_256; + #endif + } + #endif } #endif + + g_AesCbc_Decode = d; + #ifndef _SFX + g_AesCbc_Encode = e; + g_AesCtr_Code = c; + g_Aes_SupportedFunctions_Flags = flags; + #endif + } } @@ -142,8 +196,11 @@ void AesGenTables(void) void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) { - unsigned i, wSize; - wSize = keySize + 28; + unsigned i, m; + const UInt32 *wLim; + UInt32 t; + UInt32 rcon = 1; + keySize /= 4; w[0] = ((UInt32)keySize / 2) + 3; w += 4; @@ -151,16 +208,26 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) for (i = 0; i < keySize; i++, key += 4) w[i] = GetUi32(key); - for (; i < wSize; i++) + t = w[(size_t)keySize - 1]; + wLim = w + (size_t)keySize * 3 + 28; + m = 0; + do { - UInt32 t = w[(size_t)i - 1]; - unsigned rem = i % keySize; - if (rem == 0) - t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]); - else if (keySize > 6 && rem == 4) + if (m == 0) + { + t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]); + rcon <<= 1; + if (rcon & 0x100) + rcon = 0x1b; + m = keySize; + } + else if (m == 4 && keySize > 6) t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]); - w[i] = w[i - keySize] ^ t; + m--; + t ^= w[0]; + w[keySize] = t; } + while (++w != wLim); } void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) @@ -184,6 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) src and dest are pointers to 4 UInt32 words. src and dest can point to same block */ +// MY_FORCE_INLINE static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) { UInt32 s[4]; @@ -207,6 +275,7 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) FT4(0); FT4(1); FT4(2); FT4(3); } +MY_FORCE_INLINE static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src) { UInt32 s[4]; @@ -294,7 +363,7 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) UInt32 t = temp[i]; #ifdef MY_CPU_LE_UNALIGN - *((UInt32 *)data) ^= t; + *((UInt32 *)(void *)data) ^= t; #else data[0] ^= (t & 0xFF); data[1] ^= ((t >> 8) & 0xFF); diff --git a/deps/LZMA-SDK/C/Aes.h b/deps/LZMA-SDK/C/Aes.h index 381e979d1..602e25ea2 100644 --- a/deps/LZMA-SDK/C/Aes.h +++ b/deps/LZMA-SDK/C/Aes.h @@ -1,5 +1,5 @@ /* Aes.h -- AES encryption / decryption -2013-01-18 : Igor Pavlov : Public domain */ +2018-04-28 : Igor Pavlov : Public domain */ #ifndef __AES_H #define __AES_H @@ -26,12 +26,34 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize) /* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */ void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */ + /* data - 16-byte aligned pointer to data */ /* numBlocks - the number of 16-byte blocks in data array */ typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks); -extern AES_CODE_FUNC g_AesCbc_Encode; + extern AES_CODE_FUNC g_AesCbc_Decode; +#ifndef _SFX +extern AES_CODE_FUNC g_AesCbc_Encode; extern AES_CODE_FUNC g_AesCtr_Code; +#define k_Aes_SupportedFunctions_HW (1 << 2) +#define k_Aes_SupportedFunctions_HW_256 (1 << 3) +extern UInt32 g_Aes_SupportedFunctions_Flags; +#endif + + +#define DECLARE__AES_CODE_FUNC(funcName) \ + void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks); + +DECLARE__AES_CODE_FUNC (AesCbc_Encode) +DECLARE__AES_CODE_FUNC (AesCbc_Decode) +DECLARE__AES_CODE_FUNC (AesCtr_Code) + +DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW) +DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW) +DECLARE__AES_CODE_FUNC (AesCtr_Code_HW) + +DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256) +DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256) EXTERN_C_END diff --git a/deps/LZMA-SDK/C/AesOpt.c b/deps/LZMA-SDK/C/AesOpt.c index 0e7f49a1b..1bdc9a882 100644 --- a/deps/LZMA-SDK/C/AesOpt.c +++ b/deps/LZMA-SDK/C/AesOpt.c @@ -1,184 +1,776 @@ -/* AesOpt.c -- Intel's AES -2017-06-08 : Igor Pavlov : Public domain */ +/* AesOpt.c -- AES optimized code for x86 AES hardware instructions +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "CpuArch.h" #ifdef MY_CPU_X86_OR_AMD64 -#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) -#define USE_INTEL_AES + + #if defined(__clang__) + #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8) + #define USE_INTEL_AES + #define ATTRIB_AES __attribute__((__target__("aes"))) + #if (__clang_major__ >= 8) + #define USE_INTEL_VAES + #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2"))) + #endif + #endif + #elif defined(__GNUC__) + #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) + #define USE_INTEL_AES + #ifndef __AES__ + #define ATTRIB_AES __attribute__((__target__("aes"))) + #endif + #if (__GNUC__ >= 8) + #define USE_INTEL_VAES + #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2"))) + #endif + #endif + #elif defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1110) + #define USE_INTEL_AES + #if (__INTEL_COMPILER >= 1900) + #define USE_INTEL_VAES + #endif + #endif + #elif defined(_MSC_VER) + #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) + #define USE_INTEL_AES + #if (_MSC_VER >= 1910) + #define USE_INTEL_VAES + #endif + #endif + #endif + +#ifndef ATTRIB_AES + #define ATTRIB_AES #endif +#ifndef ATTRIB_VAES + #define ATTRIB_VAES #endif + #ifdef USE_INTEL_AES #include -void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks) +#ifndef USE_INTEL_VAES +#define AES_TYPE_keys __m128i +#define AES_TYPE_data __m128i +#endif + +#define AES_FUNC_START(name) \ + void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks) + +#define AES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_AES \ +AES_FUNC_START (name) + +#define MM_OP(op, dest, src) dest = op(dest, src); +#define MM_OP_m(op, src) MM_OP(op, m, src); + +#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src); +#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src); + + +AES_FUNC_START2 (AesCbc_Encode_HW) { __m128i m = *p; + const __m128i k0 = p[2]; + const __m128i k1 = p[3]; + const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; for (; numBlocks != 0; numBlocks--, data++) { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; - const __m128i *w = p + 3; - m = _mm_xor_si128(m, *data); - m = _mm_xor_si128(m, p[2]); + UInt32 r = numRounds2; + const __m128i *w = p + 4; + __m128i temp = *data; + MM_XOR (temp, k0); + MM_XOR (m, temp); + MM_OP_m (_mm_aesenc_si128, k1); do { - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenc_si128(m, w[1]); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenc_si128, w[1]); w += 2; } - while (--numRounds2 != 0); - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenclast_si128(m, w[1]); + while (--r); + MM_OP_m (_mm_aesenclast_si128, w[0]); *data = m; } *p = m; } -#define NUM_WAYS 3 -#define AES_OP_W(op, n) { \ - const __m128i t = w[n]; \ - m0 = op(m0, t); \ - m1 = op(m1, t); \ - m2 = op(m2, t); \ - } +#define WOP_1(op) +#define WOP_2(op) WOP_1 (op) op (m1, 1); +#define WOP_3(op) WOP_2 (op) op (m2, 2); +#define WOP_4(op) WOP_3 (op) op (m3, 3); +#ifdef MY_CPU_AMD64 +#define WOP_5(op) WOP_4 (op) op (m4, 4); +#define WOP_6(op) WOP_5 (op) op (m5, 5); +#define WOP_7(op) WOP_6 (op) op (m6, 6); +#define WOP_8(op) WOP_7 (op) op (m7, 7); +#endif +/* +#define WOP_9(op) WOP_8 (op) op (m8, 8); +#define WOP_10(op) WOP_9 (op) op (m9, 9); +#define WOP_11(op) WOP_10(op) op (m10, 10); +#define WOP_12(op) WOP_11(op) op (m11, 11); +#define WOP_13(op) WOP_12(op) op (m12, 12); +#define WOP_14(op) WOP_13(op) op (m13, 13); +*/ + +#ifdef MY_CPU_AMD64 + #define NUM_WAYS 8 + #define WOP_M1 WOP_8 +#else + #define NUM_WAYS 4 + #define WOP_M1 WOP_4 +#endif + +#define WOP(op) op (m0, 0); WOP_M1(op) + + +#define DECLARE_VAR(reg, ii) __m128i reg +#define LOAD_data( reg, ii) reg = data[ii]; +#define STORE_data( reg, ii) data[ii] = reg; +#if (NUM_WAYS > 1) +#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); +#endif + +#define AVX__DECLARE_VAR(reg, ii) __m256i reg +#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii]; +#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg; +#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii])); + +#define MM_OP_key(op, reg) MM_OP(op, reg, key); + +#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg) +#define AES_DEC_LAST( reg, ii) MM_OP_key (_mm_aesdeclast_si128, reg) +#define AES_ENC( reg, ii) MM_OP_key (_mm_aesenc_si128, reg) +#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg) +#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg) + -#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n) -#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n) -#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n) -#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n) +#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) +#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) +#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) +#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) +#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) -void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks) +#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr; +#define CTR_END( reg, ii) MM_XOR (data[ii], reg); + +#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key); +#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg); + +#define WOP_KEY(op, n) { \ + const __m128i key = w[n]; \ + WOP(op); } + +#define AVX__WOP_KEY(op, n) { \ + const __m256i key = w[n]; \ + WOP(op); } + + +#define WIDE_LOOP_START \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS) \ + { dataEnd -= NUM_WAYS; do { \ + + +#define WIDE_LOOP_END \ + data += NUM_WAYS; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS; } \ + + +#define SINGLE_LOOP \ + for (; data < dataEnd; data++) + + +#define NUM_AES_KEYS_MAX 15 + +#define WIDE_LOOP_START_AVX(OP) \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS * 2) \ + { __m256i keys[NUM_AES_KEYS_MAX]; \ + UInt32 ii; \ + OP \ + for (ii = 0; ii < numRounds; ii++) \ + keys[ii] = _mm256_broadcastsi128_si256(p[ii]); \ + dataEnd -= NUM_WAYS * 2; do { \ + + +#define WIDE_LOOP_END_AVX(OP) \ + data += NUM_WAYS * 2; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS * 2; \ + OP \ + _mm256_zeroupper(); \ + } \ + +/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified, + MSVC still can insert vzeroupper instruction. */ + + +AES_FUNC_START2 (AesCbc_Decode_HW) { __m128i iv = *p; - for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1; + const __m128i *dataEnd; + p += 2; + + WIDE_LOOP_START { - UInt32 numRounds2 = *(const UInt32 *)(p + 1); - const __m128i *w = p + numRounds2 * 2; - __m128i m0, m1, m2; + const __m128i *w = wStart; + + WOP (DECLARE_VAR) + WOP (LOAD_data); + WOP_KEY (AES_XOR, 1) + + do { - const __m128i t = w[2]; - m0 = _mm_xor_si128(t, data[0]); - m1 = _mm_xor_si128(t, data[1]); - m2 = _mm_xor_si128(t, data[2]); + WOP_KEY (AES_DEC, 0) + w--; } - numRounds2--; + while (w != p); + WOP_KEY (AES_DEC_LAST, 0) + + MM_XOR (m0, iv); + WOP_M1 (XOR_data_M1) + iv = data[NUM_WAYS - 1]; + WOP (STORE_data); + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const __m128i *w = wStart - 1; + __m128i m = _mm_xor_si128 (w[2], *data); do { - AES_DEC(1) - AES_DEC(0) + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdec_si128, w[0]); w -= 2; } - while (--numRounds2 != 0); - AES_DEC(1) - AES_DEC_LAST(0) + while (w != p); + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdeclast_si128, w[0]); + MM_XOR (m, iv); + iv = *data; + *data = m; + } + + p[-2] = iv; +} + + +AES_FUNC_START2 (AesCtr_Code_HW) +{ + __m128i ctr = *p; + UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1; + const __m128i *dataEnd; + __m128i one = _mm_cvtsi32_si128(1); + + p += 2; + + WIDE_LOOP_START + { + const __m128i *w = p; + UInt32 r = numRoundsMinus2; + WOP (DECLARE_VAR) + WOP (CTR_START); + WOP_KEY (AES_XOR, 0) + w += 1; + do { - __m128i t; - t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t; - t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t; - t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t; + WOP_KEY (AES_ENC, 0) + w += 1; } + while (--r); + WOP_KEY (AES_ENC_LAST, 0) + + WOP (CTR_END); } - for (; numBlocks != 0; numBlocks--, data++) + WIDE_LOOP_END + + SINGLE_LOOP + { + UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; + const __m128i *w = p; + __m128i m; + MM_OP (_mm_add_epi64, ctr, one); + m = _mm_xor_si128 (ctr, p[0]); + w += 1; + do + { + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenc_si128, w[1]); + w += 2; + } + while (--numRounds2); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenclast_si128, w[1]); + MM_XOR (*data, m); + } + + p[-2] = ctr; +} + + + +#ifdef USE_INTEL_VAES + +#if defined(__clang__) && defined(_MSC_VER) +#define __SSE4_2__ +#define __AES__ +#define __AVX__ +#define __AVX2__ +#define __VAES__ +#define __AVX512F__ +#define __AVX512VL__ +#endif + +#include + +#define VAES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_VAES \ +AES_FUNC_START (name) + +VAES_FUNC_START2 (AesCbc_Decode_HW_256) +{ + __m128i iv = *p; + const __m128i *dataEnd; + UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; + p += 2; + + WIDE_LOOP_START_AVX(;) { - UInt32 numRounds2 = *(const UInt32 *)(p + 1); - const __m128i *w = p + numRounds2 * 2; - __m128i m = _mm_xor_si128(w[2], *data); - numRounds2--; + const __m256i *w = keys + numRounds - 2; + + WOP (AVX__DECLARE_VAR) + WOP (AVX__LOAD_data); + AVX__WOP_KEY (AVX__AES_XOR, 1) + do { - m = _mm_aesdec_si128(m, w[1]); - m = _mm_aesdec_si128(m, w[0]); + AVX__WOP_KEY (AVX__AES_DEC, 0) + w--; + } + while (w != keys); + AVX__WOP_KEY (AVX__AES_DEC_LAST, 0) + + AVX_XOR (m0, _mm256_setr_m128i(iv, data[0])); + WOP_M1 (AVX__XOR_data_M1) + iv = data[NUM_WAYS * 2 - 1]; + WOP (AVX__STORE_data); + } + WIDE_LOOP_END_AVX(;) + + SINGLE_LOOP + { + const __m128i *w = p + *(const UInt32 *)(p + 1 - 2) * 2 + 1 - 3; + __m128i m = _mm_xor_si128 (w[2], *data); + do + { + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdec_si128, w[0]); w -= 2; } - while (--numRounds2 != 0); - m = _mm_aesdec_si128(m, w[1]); - m = _mm_aesdeclast_si128(m, w[0]); + while (w != p); + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdeclast_si128, w[0]); - m = _mm_xor_si128(m, iv); + MM_XOR (m, iv); iv = *data; *data = m; } - *p = iv; + + p[-2] = iv; } -void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks) + +/* +SSE2: _mm_cvtsi32_si128 : movd +AVX: _mm256_setr_m128i : vinsertf128 +AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm + _mm256_extracti128_si256 : vextracti128 + _mm256_broadcastsi128_si256 : vbroadcasti128 +*/ + +#define AVX__CTR_LOOP_START \ + ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \ + two = _mm256_setr_m128i(one, one); \ + two = _mm256_add_epi64(two, two); \ + +// two = _mm256_setr_epi64x(2, 0, 2, 0); + +#define AVX__CTR_LOOP_ENC \ + ctr = _mm256_extracti128_si256 (ctr2, 1); \ + +VAES_FUNC_START2 (AesCtr_Code_HW_256) { __m128i ctr = *p; - __m128i one; - one.m128i_u64[0] = 1; - one.m128i_u64[1] = 0; - for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; + const __m128i *dataEnd; + __m128i one = _mm_cvtsi32_si128(1); + __m256i ctr2, two; + p += 2; + + WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START) { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; - const __m128i *w = p; - __m128i m0, m1, m2; - { - const __m128i t = w[2]; - ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t); - ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t); - ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t); - } - w += 3; + const __m256i *w = keys; + UInt32 r = numRounds - 2; + WOP (AVX__DECLARE_VAR) + AVX__WOP_KEY (AVX__CTR_START, 0); + + w += 1; do { - AES_ENC(0) - AES_ENC(1) - w += 2; + AVX__WOP_KEY (AVX__AES_ENC, 0) + w += 1; } - while (--numRounds2 != 0); - AES_ENC(0) - AES_ENC_LAST(1) - data[0] = _mm_xor_si128(data[0], m0); - data[1] = _mm_xor_si128(data[1], m1); - data[2] = _mm_xor_si128(data[2], m2); + while (--r); + AVX__WOP_KEY (AVX__AES_ENC_LAST, 0) + + WOP (AVX__CTR_END); } - for (; numBlocks != 0; numBlocks--, data++) + WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC) + + SINGLE_LOOP { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; const __m128i *w = p; __m128i m; - ctr = _mm_add_epi64(ctr, one); - m = _mm_xor_si128(ctr, p[2]); - w += 3; + MM_OP (_mm_add_epi64, ctr, one); + m = _mm_xor_si128 (ctr, p[0]); + w += 1; do { - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenc_si128(m, w[1]); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenc_si128, w[1]); w += 2; } - while (--numRounds2 != 0); - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenclast_si128(m, w[1]); - *data = _mm_xor_si128(*data, m); + while (--numRounds2); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenclast_si128, w[1]); + MM_XOR (*data, m); } - *p = ctr; + + p[-2] = ctr; } +#endif // USE_INTEL_VAES + +#else // USE_INTEL_AES + +/* no USE_INTEL_AES */ + +#pragma message("AES HW_SW stub was used") + +#define AES_TYPE_keys UInt32 +#define AES_TYPE_data Byte + +#define AES_FUNC_START(name) \ + void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \ + +#define AES_COMPAT_STUB(name) \ + AES_FUNC_START(name); \ + AES_FUNC_START(name ## _HW) \ + { name(p, data, numBlocks); } + +AES_COMPAT_STUB (AesCbc_Encode) +AES_COMPAT_STUB (AesCbc_Decode) +AES_COMPAT_STUB (AesCtr_Code) + +#endif // USE_INTEL_AES + + +#ifndef USE_INTEL_VAES + +#pragma message("VAES HW_SW stub was used") + +#define VAES_COMPAT_STUB(name) \ + void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ + void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \ + { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); } + +VAES_COMPAT_STUB (AesCbc_Decode_HW) +VAES_COMPAT_STUB (AesCtr_Code_HW) + +#endif // ! USE_INTEL_VAES + + +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_AES + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define USE_HW_AES + #endif + #elif defined(_MSC_VER) + #if _MSC_VER >= 1910 + #define USE_HW_AES + #endif + #endif + +#ifdef USE_HW_AES + +// #pragma message("=== AES HW === ") + +#if defined(__clang__) || defined(__GNUC__) + #ifdef MY_CPU_ARM64 + #define ATTRIB_AES __attribute__((__target__("+crypto"))) + #else + #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif +#else + // _MSC_VER + // for arm32 + #define _ARM_USE_NEW_NEON_INTRINSICS +#endif + +#ifndef ATTRIB_AES + #define ATTRIB_AES +#endif + +#if defined(_MSC_VER) && defined(MY_CPU_ARM64) +#include #else +#include +#endif + +typedef uint8x16_t v128; + +#define AES_FUNC_START(name) \ + void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks) + +#define AES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_AES \ +AES_FUNC_START (name) + +#define MM_OP(op, dest, src) dest = op(dest, src); +#define MM_OP_m(op, src) MM_OP(op, m, src); +#define MM_OP1_m(op) m = op(m); -void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); +#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src); +#define MM_XOR_m( src) MM_XOR(m, src); -void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks) +#define AES_E_m(k) MM_OP_m (vaeseq_u8, k); +#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8); + + +AES_FUNC_START2 (AesCbc_Encode_HW) { - AesCbc_Encode(p, data, numBlocks); + v128 m = *p; + const v128 k0 = p[2]; + const v128 k1 = p[3]; + const v128 k2 = p[4]; + const v128 k3 = p[5]; + const v128 k4 = p[6]; + const v128 k5 = p[7]; + const v128 k6 = p[8]; + const v128 k7 = p[9]; + const v128 k8 = p[10]; + const v128 k9 = p[11]; + const UInt32 numRounds2 = *(const UInt32 *)(p + 1); + const v128 *w = p + ((size_t)numRounds2 * 2); + const v128 k_z1 = w[1]; + const v128 k_z0 = w[2]; + for (; numBlocks != 0; numBlocks--, data++) + { + MM_XOR_m (*data); + AES_E_MC_m (k0) + AES_E_MC_m (k1) + AES_E_MC_m (k2) + AES_E_MC_m (k3) + AES_E_MC_m (k4) + AES_E_MC_m (k5) + AES_E_MC_m (k6) + AES_E_MC_m (k7) + AES_E_MC_m (k8) + if (numRounds2 >= 6) + { + AES_E_MC_m (k9) + AES_E_MC_m (p[12]) + if (numRounds2 != 6) + { + AES_E_MC_m (p[13]) + AES_E_MC_m (p[14]) + } + } + AES_E_m (k_z1); + MM_XOR_m (k_z0); + *data = m; + } + *p = m; } -void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks) + +#define WOP_1(op) +#define WOP_2(op) WOP_1 (op) op (m1, 1); +#define WOP_3(op) WOP_2 (op) op (m2, 2); +#define WOP_4(op) WOP_3 (op) op (m3, 3); +#define WOP_5(op) WOP_4 (op) op (m4, 4); +#define WOP_6(op) WOP_5 (op) op (m5, 5); +#define WOP_7(op) WOP_6 (op) op (m6, 6); +#define WOP_8(op) WOP_7 (op) op (m7, 7); + + #define NUM_WAYS 8 + #define WOP_M1 WOP_8 + +#define WOP(op) op (m0, 0); WOP_M1(op) + +#define DECLARE_VAR(reg, ii) v128 reg +#define LOAD_data( reg, ii) reg = data[ii]; +#define STORE_data( reg, ii) data[ii] = reg; +#if (NUM_WAYS > 1) +#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); +#endif + +#define MM_OP_key(op, reg) MM_OP (op, reg, key); + +#define AES_D_m(k) MM_OP_m (vaesdq_u8, k); +#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8); + +#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg) +#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg) +#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg) + +#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg) +#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg) + +#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr); +#define CTR_END( reg, ii) MM_XOR (data[ii], reg); + +#define WOP_KEY(op, n) { \ + const v128 key = w[n]; \ + WOP(op); } + +#define WIDE_LOOP_START \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS) \ + { dataEnd -= NUM_WAYS; do { \ + +#define WIDE_LOOP_END \ + data += NUM_WAYS; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS; } \ + +#define SINGLE_LOOP \ + for (; data < dataEnd; data++) + + +AES_FUNC_START2 (AesCbc_Decode_HW) { - AesCbc_Decode(p, data, numBlocks); + v128 iv = *p; + const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; + const v128 *dataEnd; + p += 2; + + WIDE_LOOP_START + { + const v128 *w = wStart; + WOP (DECLARE_VAR) + WOP (LOAD_data); + WOP_KEY (AES_D_IMC, 2) + do + { + WOP_KEY (AES_D_IMC, 1) + WOP_KEY (AES_D_IMC, 0) + w -= 2; + } + while (w != p); + WOP_KEY (AES_D, 1) + WOP_KEY (AES_XOR, 0) + MM_XOR (m0, iv); + WOP_M1 (XOR_data_M1) + iv = data[NUM_WAYS - 1]; + WOP (STORE_data); + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const v128 *w = wStart; + v128 m = *data; + AES_D_IMC_m (w[2]) + do + { + AES_D_IMC_m (w[1]); + AES_D_IMC_m (w[0]); + w -= 2; + } + while (w != p); + AES_D_m (w[1]); + MM_XOR_m (w[0]); + MM_XOR_m (iv); + iv = *data; + *data = m; + } + + p[-2] = iv; } -void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks) + +AES_FUNC_START2 (AesCtr_Code_HW) { - AesCtr_Code(p, data, numBlocks); + uint64x2_t ctr = vreinterpretq_u64_u8(*p); + const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; + const v128 *dataEnd; + uint64x2_t one = vdupq_n_u64(0); + one = vsetq_lane_u64(1, one, 0); + p += 2; + + WIDE_LOOP_START + { + const v128 *w = p; + WOP (DECLARE_VAR) + WOP (CTR_START); + do + { + WOP_KEY (AES_E_MC, 0) + WOP_KEY (AES_E_MC, 1) + w += 2; + } + while (w != wEnd); + WOP_KEY (AES_E_MC, 0) + WOP_KEY (AES_E, 1) + WOP_KEY (AES_XOR, 2) + WOP (CTR_END); + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const v128 *w = p; + v128 m; + CTR_START (m, 0); + do + { + AES_E_MC_m (w[0]); + AES_E_MC_m (w[1]); + w += 2; + } + while (w != wEnd); + AES_E_MC_m (w[0]); + AES_E_m (w[1]); + MM_XOR_m (w[2]); + CTR_END (m, 0); + } + + p[-2] = vreinterpretq_u8_u64(ctr); } -#endif +#endif // USE_HW_AES + +#endif // MY_CPU_ARM_OR_ARM64 diff --git a/deps/LZMA-SDK/C/Alloc.c b/deps/LZMA-SDK/C/Alloc.c index 30b499e5f..9f249c250 100644 --- a/deps/LZMA-SDK/C/Alloc.c +++ b/deps/LZMA-SDK/C/Alloc.c @@ -1,12 +1,12 @@ /* Alloc.c -- Memory allocation functions -2018-04-27 : Igor Pavlov : Public domain */ +2020-10-29 : Igor Pavlov : Public domain */ #include "Precomp.h" #include #ifdef _WIN32 -#include +#include #endif #include @@ -122,7 +122,6 @@ static void PrintAddr(void *p) #define Print(s) #define PrintLn() #define PrintHex(v, align) -#define PrintDec(v, align) #define PrintAddr(p) #endif @@ -133,10 +132,11 @@ void *MyAlloc(size_t size) { if (size == 0) return NULL; + PRINT_ALLOC("Alloc ", g_allocCount, size, NULL); #ifdef _SZ_ALLOC_DEBUG { void *p = malloc(size); - PRINT_ALLOC("Alloc ", g_allocCount, size, p); + // PRINT_ALLOC("Alloc ", g_allocCount, size, p); return p; } #else @@ -172,14 +172,20 @@ void MidFree(void *address) VirtualFree(address, 0, MEM_RELEASE); } -#ifndef MEM_LARGE_PAGES -#undef _7ZIP_LARGE_PAGES +#ifdef _7ZIP_LARGE_PAGES + +#ifdef MEM_LARGE_PAGES + #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES +#else + #define MY__MEM_LARGE_PAGES 0x20000000 #endif -#ifdef _7ZIP_LARGE_PAGES +extern +SIZE_T g_LargePageSize; SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); -#endif +typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID); + +#endif // _7ZIP_LARGE_PAGES void SetLargePageSize() { @@ -214,7 +220,7 @@ void *BigAlloc(size_t size) size2 = (size + ps) & ~ps; if (size2 >= size) { - void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); if (res) return res; } @@ -280,13 +286,15 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; */ #define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) -#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) - -#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32) +#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) #define USE_posix_memalign #endif +#ifndef USE_posix_memalign +#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) +#endif + /* This posix_memalign() is for test purposes only. We also need special Free() function instead of free(), diff --git a/deps/LZMA-SDK/C/Alloc.h b/deps/LZMA-SDK/C/Alloc.h index 3d796e5ee..a1bbe942c 100644 --- a/deps/LZMA-SDK/C/Alloc.h +++ b/deps/LZMA-SDK/C/Alloc.h @@ -1,5 +1,5 @@ /* Alloc.h -- Memory allocation functions -2018-02-19 : Igor Pavlov : Public domain */ +2021-02-08 : Igor Pavlov : Public domain */ #ifndef __COMMON_ALLOC_H #define __COMMON_ALLOC_H @@ -13,7 +13,7 @@ void MyFree(void *address); #ifdef _WIN32 -void SetLargePageSize(); +void SetLargePageSize(void); void *MidAlloc(size_t size); void MidFree(void *address); diff --git a/deps/LZMA-SDK/C/Bcj2.c b/deps/LZMA-SDK/C/Bcj2.c index da93985cf..c1772f234 100644 --- a/deps/LZMA-SDK/C/Bcj2.c +++ b/deps/LZMA-SDK/C/Bcj2.c @@ -1,5 +1,5 @@ /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) -2018-04-28 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; const Byte *srcLim; Byte *dest; - SizeT num = p->lims[BCJ2_STREAM_MAIN] - src; + SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); if (num == 0) { @@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) dest = p->dest; if (num > (SizeT)(p->destLim - dest)) { - num = p->destLim - dest; + num = (SizeT)(p->destLim - dest); if (num == 0) { p->state = BCJ2_DEC_STATE_ORIG; @@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) break; } - num = src - p->bufs[BCJ2_STREAM_MAIN]; + num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); if (src == srcLim) { @@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) p->ip += 4; val -= p->ip; dest = p->dest; - rem = p->destLim - dest; + rem = (SizeT)(p->destLim - dest); if (rem < 4) { diff --git a/deps/LZMA-SDK/C/Bcj2Enc.c b/deps/LZMA-SDK/C/Bcj2Enc.c index 7a02ecde2..71ac5091d 100644 --- a/deps/LZMA-SDK/C/Bcj2Enc.c +++ b/deps/LZMA-SDK/C/Bcj2Enc.c @@ -1,5 +1,5 @@ /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code) -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -104,7 +104,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p) const Byte *src = p->src; const Byte *srcLim; Byte *dest; - SizeT num = p->srcLim - src; + SizeT num = (SizeT)(p->srcLim - src); if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) { @@ -118,7 +118,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p) dest = p->bufs[BCJ2_STREAM_MAIN]; if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest)) { - num = p->lims[BCJ2_STREAM_MAIN] - dest; + num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); if (num == 0) { p->state = BCJ2_STREAM_MAIN; @@ -152,7 +152,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p) break; } - num = src - p->src; + num = (SizeT)(src - p->src); if (src == srcLim) { diff --git a/deps/LZMA-SDK/C/Bra.c b/deps/LZMA-SDK/C/Bra.c index cbdcb290d..cdefa4d2e 100644 --- a/deps/LZMA-SDK/C/Bra.c +++ b/deps/LZMA-SDK/C/Bra.c @@ -1,5 +1,5 @@ /* Bra.c -- Converters for RISC code -2017-04-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; if (p[-1] == 0xEB) break; @@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; if (p[-1] == 0xEB) break; @@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) { UInt32 b3; if (p > lim) - return p - data; + return (SizeT)(p - data); b1 = p[1]; b3 = p[3]; p += 2; @@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) { UInt32 b3; if (p > lim) - return p - data; + return (SizeT)(p - data); b1 = p[1]; b3 = p[3]; p += 2; @@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; /* if ((v & 0xFC000003) == 0x48000001) */ if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) @@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); /* v = GetBe32(p); p += 4; diff --git a/deps/LZMA-SDK/C/Bra86.c b/deps/LZMA-SDK/C/Bra86.c index a6463c63b..d857dac67 100644 --- a/deps/LZMA-SDK/C/Bra86.c +++ b/deps/LZMA-SDK/C/Bra86.c @@ -1,5 +1,5 @@ /* Bra86.c -- Converter for x86 code (BCJ) -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding break; { - SizeT d = (SizeT)(p - data - pos); + SizeT d = (SizeT)(p - data) - pos; pos = (SizeT)(p - data); if (p >= limit) { diff --git a/deps/LZMA-SDK/C/Compiler.h b/deps/LZMA-SDK/C/Compiler.h index c788648cd..eba374298 100644 --- a/deps/LZMA-SDK/C/Compiler.h +++ b/deps/LZMA-SDK/C/Compiler.h @@ -1,9 +1,13 @@ /* Compiler.h -2017-04-03 : Igor Pavlov : Public domain */ +2021-01-05 : Igor Pavlov : Public domain */ #ifndef __7Z_COMPILER_H #define __7Z_COMPILER_H + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wunused-private-field" + #endif + #ifdef _MSC_VER #ifdef UNDER_CE @@ -25,6 +29,12 @@ #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information #endif + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wdeprecated-declarations" + #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" + // #pragma clang diagnostic ignored "-Wreserved-id-macro" + #endif + #endif #define UNUSED_VAR(x) (void)x; diff --git a/deps/LZMA-SDK/C/CpuArch.c b/deps/LZMA-SDK/C/CpuArch.c index ff1890e7f..bcdbc4cc1 100644 --- a/deps/LZMA-SDK/C/CpuArch.c +++ b/deps/LZMA-SDK/C/CpuArch.c @@ -1,5 +1,5 @@ /* CpuArch.c -- CPU specific code -2018-02-18: Igor Pavlov : Public domain */ +2021-04-28 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag) #define CHECK_CPUID_IS_SUPPORTED #endif +#ifndef USE_ASM + #ifdef _MSC_VER + #if _MSC_VER >= 1600 + #define MY__cpuidex __cpuidex + #else + +/* + __cpuid (function == 4) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) doesn't clear ECX + We still can use __cpuid for low (function) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FAST_CALL / NO_INLINE function, + So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + + DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! +*/ + +static +MY_NO_INLINE +void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) +{ + UNUSED_VAR(subFunction); + __cpuid(CPUInfo, function); +} + + #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) + #pragma message("======== MY__cpuidex_HACK WAS USED ========") + #endif + #else + #define MY__cpuidex(info, func, func2) __cpuid(info, func) + #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") + #endif +#endif + + + + void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) { #ifdef USE_ASM @@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) #endif "=c" (*c) , "=d" (*d) - : "0" (function)) ; + : "0" (function), "c"(0) ) ; #endif #else int CPUInfo[4]; - __cpuid(CPUInfo, function); - *a = CPUInfo[0]; - *b = CPUInfo[1]; - *c = CPUInfo[2]; - *d = CPUInfo[3]; + + MY__cpuidex(CPUInfo, (int)function, 0); + + *a = (UInt32)CPUInfo[0]; + *b = (UInt32)CPUInfo[1]; + *c = (UInt32)CPUInfo[2]; + *d = (UInt32)CPUInfo[3]; #endif } @@ -174,7 +217,7 @@ BoolInt CPU_Is_InOrder() } #if !defined(MY_CPU_AMD64) && defined(_WIN32) -#include +#include static BoolInt CPU_Sys_Is_SSE_Supported() { OSVERSIONINFO vi; @@ -188,13 +231,77 @@ static BoolInt CPU_Sys_Is_SSE_Supported() #define CHECK_SYS_SSE_SUPPORT #endif -BoolInt CPU_Is_Aes_Supported() + +static UInt32 X86_CPUID_ECX_Get_Flags() { Cx86cpuid p; CHECK_SYS_SSE_SUPPORT if (!x86cpuid_CheckAndRead(&p)) + return 0; + return p.c; +} + +BoolInt CPU_IsSupported_AES() +{ + return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3() +{ + return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41() +{ + return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return False; + + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + return (d[1] >> 29) & 1; + } +} + +// #include + +#ifdef _WIN32 +#include +#endif + +BoolInt CPU_IsSupported_VAES_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) return False; - return (p.c >> 25) & 1; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (d[2] >> 9); // vaes // VEX-256/EVEX + } } BoolInt CPU_IsSupported_PageGB() @@ -215,4 +322,117 @@ BoolInt CPU_IsSupported_PageGB() } } + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include + +BoolInt CPU_IsSupported_CRC32() + { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO() + { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ + +BoolInt CPU_IsSupported_CRC32(void) +{ + /* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); + */ + + UInt32 val = 0; + if (My_sysctlbyname_Get_UInt32("hw.optional.armv8_crc32", &val) == 0 && val == 1) + return 1; + return 0; +} + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#include + +#define USE_HWCAP + +#ifdef USE_HWCAP + +#include + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name)) ? 1 : 0; } +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return 0; } + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + int res = My_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + #endif diff --git a/deps/LZMA-SDK/C/CpuArch.h b/deps/LZMA-SDK/C/CpuArch.h index 5f74c1c0c..e1cde536d 100644 --- a/deps/LZMA-SDK/C/CpuArch.h +++ b/deps/LZMA-SDK/C/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2018-02-18 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #ifndef __CPU_ARCH_H #define __CPU_ARCH_H @@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN. If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ #if defined(_M_X64) \ @@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY_CPU_AMD64 #ifdef __ILP32__ #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 #else #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT #endif @@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem || defined(__i386__) #define MY_CPU_X86 #define MY_CPU_NAME "x86" - #define MY_CPU_32BIT + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif @@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem || defined(__THUMBEL__) \ || defined(__THUMBEB__) #define MY_CPU_ARM - #define MY_CPU_NAME "arm" - #define MY_CPU_32BIT + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif @@ -84,17 +97,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #if defined(__ppc64__) \ - || defined(__powerpc64__) + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) #ifdef __ILP32__ #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 #else #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT -#elif defined(__ppc__) \ - || defined(__powerpc__) +#else #define MY_CPU_NAME "ppc" - #define MY_CPU_32BIT + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif #endif @@ -111,6 +136,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY_CPU_X86_OR_AMD64 #endif +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + #ifdef _WIN32 @@ -170,6 +199,41 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #error Stop_Compiling_Bad_32_64_BIT #endif +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + // for XLC compiler: + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + #ifndef MY_CPU_NAME #ifdef MY_CPU_LE @@ -202,9 +266,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #define GetUi64(p) (*(const UInt64 *)(const void *)(p)) -#define SetUi16(p, v) { *(UInt16 *)(p) = (v); } -#define SetUi32(p, v) { *(UInt32 *)(p) = (v); } -#define SetUi64(p, v) { *(UInt64 *)(p) = (v); } +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } #else @@ -242,7 +306,7 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY__has_builtin(x) 0 #endif -#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300) +#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) /* Note: we use bswap instruction, that is unsupported in 386 cpu */ @@ -253,8 +317,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #pragma intrinsic(_byteswap_uint64) /* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p)) +#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) @@ -262,9 +326,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) -/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p)) -#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p)) +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ +#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) @@ -325,10 +389,35 @@ int x86cpuid_GetFirm(const Cx86cpuid *p); #define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) #define x86cpuid_GetStepping(ver) (ver & 0xF) -BoolInt CPU_Is_InOrder(); -BoolInt CPU_Is_Aes_Supported(); -BoolInt CPU_IsSupported_PageGB(); +BoolInt CPU_Is_InOrder(void); + +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif + +#endif +#if defined(__APPLE__) +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); #endif EXTERN_C_END diff --git a/deps/LZMA-SDK/C/Delta.c b/deps/LZMA-SDK/C/Delta.c index 6cbbe4601..fc7e9fe96 100644 --- a/deps/LZMA-SDK/C/Delta.c +++ b/deps/LZMA-SDK/C/Delta.c @@ -1,5 +1,5 @@ /* Delta.c -- Delta converter -2009-05-26 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -12,53 +12,158 @@ void Delta_Init(Byte *state) state[i] = 0; } -static void MyMemCpy(Byte *dest, const Byte *src, unsigned size) -{ - unsigned i; - for (i = 0; i < size; i++) - dest[i] = src[i]; -} void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size) { - Byte buf[DELTA_STATE_SIZE]; - unsigned j = 0; - MyMemCpy(buf, state, delta); + Byte temp[DELTA_STATE_SIZE]; + + if (size == 0) + return; + + { + unsigned i = 0; + do + temp[i] = state[i]; + while (++i != delta); + } + + if (size <= delta) + { + unsigned i = 0, k; + do + { + Byte b = *data; + *data++ = (Byte)(b - temp[i]); + temp[i] = b; + } + while (++i != size); + + k = 0; + + do + { + if (i == delta) + i = 0; + state[k] = temp[i++]; + } + while (++k != delta); + + return; + } + { - SizeT i; - for (i = 0; i < size;) + Byte *p = data + size - delta; + { + unsigned i = 0; + do + state[i] = *p++; + while (++i != delta); + } { - for (j = 0; j < delta && i < size; i++, j++) + const Byte *lim = data + delta; + ptrdiff_t dif = -(ptrdiff_t)delta; + + if (((ptrdiff_t)size + dif) & 1) { - Byte b = data[i]; - data[i] = (Byte)(b - buf[j]); - buf[j] = b; + --p; *p = (Byte)(*p - p[dif]); } + + while (p != lim) + { + --p; *p = (Byte)(*p - p[dif]); + --p; *p = (Byte)(*p - p[dif]); + } + + dif = -dif; + + do + { + --p; *p = (Byte)(*p - temp[--dif]); + } + while (dif != 0); } } - if (j == delta) - j = 0; - MyMemCpy(state, buf + j, delta - j); - MyMemCpy(state + delta - j, buf, j); } + void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size) { - Byte buf[DELTA_STATE_SIZE]; - unsigned j = 0; - MyMemCpy(buf, state, delta); + unsigned i; + const Byte *lim; + + if (size == 0) + return; + + i = 0; + lim = data + size; + + if (size <= delta) + { + do + *data = (Byte)(*data + state[i++]); + while (++data != lim); + + for (; delta != i; state++, delta--) + *state = state[i]; + data -= i; + } + else { - SizeT i; - for (i = 0; i < size;) + /* + #define B(n) b ## n + #define I(n) Byte B(n) = state[n]; + #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); } + #define F(n) if (data != lim) { U(n) } + + if (delta == 1) + { + I(0) + if ((lim - data) & 1) { U(0) } + while (data != lim) { U(0) U(0) } + data -= 1; + } + else if (delta == 2) { - for (j = 0; j < delta && i < size; i++, j++) + I(0) I(1) + lim -= 1; while (data < lim) { U(0) U(1) } + lim += 1; F(0) + data -= 2; + } + else if (delta == 3) + { + I(0) I(1) I(2) + lim -= 2; while (data < lim) { U(0) U(1) U(2) } + lim += 2; F(0) F(1) + data -= 3; + } + else if (delta == 4) + { + I(0) I(1) I(2) I(3) + lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) } + lim += 3; F(0) F(1) F(2) + data -= 4; + } + else + */ + { + do + { + *data = (Byte)(*data + state[i++]); + data++; + } + while (i != delta); + { - buf[j] = data[i] = (Byte)(buf[j] + data[i]); + ptrdiff_t dif = -(ptrdiff_t)delta; + do + *data = (Byte)(*data + data[dif]); + while (++data != lim); + data += dif; } } } - if (j == delta) - j = 0; - MyMemCpy(state, buf + j, delta - j); - MyMemCpy(state + delta - j, buf, j); + + do + *state++ = *data; + while (++data != lim); } diff --git a/deps/LZMA-SDK/C/DllSecur.c b/deps/LZMA-SDK/C/DllSecur.c index 19a22a9f0..b0a555c49 100644 --- a/deps/LZMA-SDK/C/DllSecur.c +++ b/deps/LZMA-SDK/C/DllSecur.c @@ -5,7 +5,7 @@ #ifdef _WIN32 -#include +#include #include "DllSecur.h" diff --git a/deps/LZMA-SDK/C/DllSecur.h b/deps/LZMA-SDK/C/DllSecur.h index 4c113568e..0fd8070e5 100644 --- a/deps/LZMA-SDK/C/DllSecur.h +++ b/deps/LZMA-SDK/C/DllSecur.h @@ -10,8 +10,8 @@ EXTERN_C_BEGIN #ifdef _WIN32 -void My_SetDefaultDllDirectories(); -void LoadSecurityDlls(); +void My_SetDefaultDllDirectories(void); +void LoadSecurityDlls(void); #endif diff --git a/deps/LZMA-SDK/C/LzFind.c b/deps/LZMA-SDK/C/LzFind.c index 4eefc17dd..18ec00ef5 100644 --- a/deps/LZMA-SDK/C/LzFind.c +++ b/deps/LZMA-SDK/C/LzFind.c @@ -1,10 +1,11 @@ /* LzFind.c -- Match finder for LZ algorithms -2018-07-08 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include +#include "CpuArch.h" #include "LzFind.h" #include "LzHash.h" @@ -14,7 +15,45 @@ #define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) #define kMaxHistorySize ((UInt32)7 << 29) -#define kStartMaxLen 3 +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +#define kFix5HashSize kFix4HashSize + +/* + HASH2_CALC: + if (hv) match, then cur[0] and cur[1] also match +*/ +#define HASH2_CALC hv = GetUi16(cur); + +// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255] + +/* + HASH3_CALC: + if (cur[0]) and (h2) match, then cur[1] also match + if (cur[0]) and (hv) match, then cur[1] and cur[2] also match +*/ +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \ + /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \ + hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; } + +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { @@ -44,9 +83,9 @@ static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr return (p->bufferBase != NULL); } -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } +static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) { @@ -77,7 +116,7 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) for (;;) { Byte *dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); + size_t size = (size_t)(p->bufferBase + p->blockSize - dest); if (size == 0) return; @@ -204,10 +243,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 hs; p->matchMaxLen = matchMaxLen; { + // UInt32 hs4; p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else + hs = (1 << 16) - 1; + if (p->numHashBytes != 2) { hs = historySize; if (hs > p->expectedDataSize) @@ -218,9 +257,9 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs |= (hs >> 2); hs |= (hs >> 4); hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later hs >>= 1; - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) + if (hs >= (1 << 24)) { if (p->numHashBytes == 3) hs = (1 << 24) - 1; @@ -228,12 +267,30 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs >>= 1; /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ } + + // hs = ((UInt32)1 << 25) - 1; // for test + + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; } p->hashMask = hs; hs++; + + /* + hs4 = (1 << 20); + if (hs4 > hs) + hs4 = hs; + // hs4 = (1 << 16); // for test + p->hash4Mask = hs4 - 1; + */ + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; hs += p->fixedHashSize; } @@ -249,6 +306,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, numSons <<= 1; newSize = hs + numSons; + // aligned size is not required here, but it can be better for some loops + #define NUM_REFS_ALIGN_MASK 0xF + newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; + if (p->hash && p->numRefs == newSize) return 1; @@ -349,15 +410,23 @@ static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - size_t i; - for (i = 0; i < numItems; i++) + if (numItems == 0) + return; { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; + const CLzRef *lim = items + numItems - 1; + for (; items < lim; items += 2) + { + UInt32 v, m; + v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; + v = items[1]; items[0] = m; m = v - subValue; if (v < subValue) m = kEmptyHashValue; + items[1] = m; + } + if (items == lim) + { + UInt32 v, m; + v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; + items[0] = m; + } } } @@ -429,8 +498,8 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos { ptrdiff_t diff; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - diff = (ptrdiff_t)0 - delta; - if (cur[maxLen] == cur[maxLen + diff]) + diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) { const Byte *c = cur; while (*c == c[diff]) @@ -588,15 +657,21 @@ static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET; +#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ + offset = (unsigned)(func((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ + distances + offset, (UInt32)(_maxLen_)) - distances); MOVE_POS_RET; + +#define GET_MATCHES_FOOTER_BT(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) + +#define GET_MATCHES_FOOTER_HC(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) #define SKIP_FOOTER \ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; #define UPDATE_maxLen { \ - ptrdiff_t diff = (ptrdiff_t)0 - d2; \ + ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ const Byte *c = cur + maxLen; \ const Byte *lim = cur + lenLimit; \ for (; c != lim; c++) if (*(c + diff) != *c) break; \ @@ -610,7 +685,7 @@ static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) curMatch = p->hash[hv]; p->hash[hv] = p->pos; offset = 0; - GET_MATCHES_FOOTER(offset, 1) + GET_MATCHES_FOOTER_BT(1) } UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) @@ -621,7 +696,7 @@ UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) curMatch = p->hash[hv]; p->hash[hv] = p->pos; offset = 0; - GET_MATCHES_FOOTER(offset, 2) + GET_MATCHES_FOOTER_BT(2) } static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) @@ -659,9 +734,10 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } } - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } + static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 h2, h3, d2, d3, pos; @@ -676,53 +752,61 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; - maxLen = 0; + maxLen = 3; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + for (;;) { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[0] = 3; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[2 + 1] = d3 - 1; + offset = 4; + } + else + break; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + offset = 2; + } + else + break; - if (offset != 0) - { UPDATE_maxLen distances[(size_t)offset - 2] = (UInt32)maxLen; if (maxLen == lenLimit) { SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } + break; } - if (maxLen < 3) - maxLen = 3; - - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } -/* + static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; + UInt32 h2, h3, d2, d3, maxLen, offset, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -733,53 +817,49 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; + // d4 = pos - (hash + kFix4HashSize)[h4]; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; + // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; - maxLen = 0; + maxLen = 4; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + for (;;) { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + else + break; + } else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + offset = 2; d2 = d3; } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { + else + break; + + distances[(size_t)offset - 2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; UPDATE_maxLen distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) @@ -787,14 +867,12 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS_RET; } + break; } - - if (maxLen < 4) - maxLen = 4; - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } -*/ + static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { @@ -816,27 +894,38 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; - maxLen = 0; + maxLen = 3; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) + for (;;) { + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[0] = 3; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[2 + 1] = d3 - 1; + offset = 4; + } + else + break; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + offset = 2; + } + else + break; + UPDATE_maxLen distances[(size_t)offset - 2] = (UInt32)maxLen; if (maxLen == lenLimit) @@ -844,20 +933,16 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } + break; } - if (maxLen < 3) - maxLen = 3; - - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET + GET_MATCHES_FOOTER_HC(maxLen); } -/* + static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos + UInt32 h2, h3, d2, d3, maxLen, offset, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -865,56 +950,52 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) hash = p->hash; pos = p->pos; - + d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; + // d4 = pos - (hash + kFix4HashSize)[h4]; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; + // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; - maxLen = 0; + maxLen = 4; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + for (;;) { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + else + break; + } else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + offset = 2; d2 = d3; } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { + else + break; + + distances[(size_t)offset - 2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; UPDATE_maxLen distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) @@ -922,16 +1003,12 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } + break; } - if (maxLen < 4) - maxLen = 4; - - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET + GET_MATCHES_FOOTER_HC(maxLen); } -*/ + UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { @@ -940,11 +1017,11 @@ UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - (distances)); - MOVE_POS_RET + offset = 0; + GET_MATCHES_FOOTER_HC(2) } + static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do @@ -1006,12 +1083,11 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) while (--num != 0); } -/* static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { - UInt32 h2, h3, h4; + UInt32 h2, h3; UInt32 *hash; SKIP_HEADER(5) HASH5_CALC; @@ -1019,13 +1095,12 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = + // (hash + kFix4HashSize)[h4] = (hash + kFix5HashSize)[hv] = p->pos; SKIP_FOOTER } while (--num != 0); } -*/ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { @@ -1046,27 +1121,26 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) while (--num != 0); } -/* static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { - UInt32 h2, h3, h4; + UInt32 h2, h3; UInt32 *hash; SKIP_HEADER(5) HASH5_CALC; hash = p->hash; - curMatch = hash + kFix5HashSize)[hv]; + curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = + // (hash + kFix4HashSize)[h4] = (hash + kFix5HashSize)[hv] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } while (--num != 0); } -*/ + void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { @@ -1089,18 +1163,16 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { - /* if (p->numHashBytes <= 4) */ + if (p->numHashBytes <= 4) { vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; } - /* else { vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; } - */ } else if (p->numHashBytes == 2) { @@ -1112,16 +1184,14 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; } - else /* if (p->numHashBytes == 4) */ + else if (p->numHashBytes == 4) { vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; } - /* else { vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; } - */ } diff --git a/deps/LZMA-SDK/C/LzFind.h b/deps/LZMA-SDK/C/LzFind.h index c77added7..3e2248e7d 100644 --- a/deps/LZMA-SDK/C/LzFind.h +++ b/deps/LZMA-SDK/C/LzFind.h @@ -1,5 +1,5 @@ /* LzFind.h -- Match finder for LZ algorithms -2017-06-10 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_H #define __LZ_FIND_H @@ -61,7 +61,7 @@ typedef struct _CMatchFinder && (!(p)->directInput || (p)->directInputRem == 0)) int MatchFinder_NeedMove(CMatchFinder *p); -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +// Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); void MatchFinder_MoveBlock(CMatchFinder *p); void MatchFinder_ReadIfRequired(CMatchFinder *p); diff --git a/deps/LZMA-SDK/C/LzFindMt.c b/deps/LZMA-SDK/C/LzFindMt.c index df32146f9..cb29a1eac 100644 --- a/deps/LZMA-SDK/C/LzFindMt.c +++ b/deps/LZMA-SDK/C/LzFindMt.c @@ -1,12 +1,64 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2018-12-29 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "LzHash.h" +#include "CpuArch.h" +#include "LzHash.h" #include "LzFindMt.h" +// #define LOG_ITERS + +#ifdef LOG_ITERS +#include +static UInt64 g_NumIters_Tree; +static UInt64 g_NumIters_Loop; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +#define kMtHashBlockSize (1 << 17) +#define kMtHashNumBlocks (1 << 1) +#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) + +#define kMtBtBlockSize (1 << 16) +#define kMtBtNumBlocks (1 << 4) +#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) + +/* + HASH functions: + We use raw 8/16 bits from a[1] and a[2], + xored with crc(a[0]) and crc(a[3]). + We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches. + our crc() function provides one-to-one correspondence for low 8-bit values: + (crc[0...0xFF] & 0xFF) <-> [0...0xFF] +*/ + +#define MT_HASH2_CALC \ + h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +/* +#define MT_HASH3_CALC__NO_2 { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define __MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; } + // (kHash4Size - 1); +*/ + + static void MtSync_Construct(CMtSync *p) { p->wasCreated = False; @@ -18,8 +70,11 @@ static void MtSync_Construct(CMtSync *p) Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->filledSemaphore); + p->affinity = 0; } + +MY_NO_INLINE static void MtSync_GetNextBlock(CMtSync *p) { if (p->needStart) @@ -81,8 +136,7 @@ static void MtSync_Destruct(CMtSync *p) p->exit = True; if (p->needStart) Event_Set(&p->canStart); - Thread_Wait(&p->thread); - Thread_Close(&p->thread); + Thread_Wait_Close(&p->thread); } if (p->csWasInitialized) { @@ -103,6 +157,7 @@ static void MtSync_Destruct(CMtSync *p) static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) { + WRes wres; if (p->wasCreated) return SZ_OK; @@ -117,8 +172,12 @@ static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); p->needStart = True; - - RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); + + if (p->affinity != 0) + wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); + else + wres = Thread_Create(&p->thread, startAddress, obj); + RINOK_THREAD(wres); p->wasCreated = True; return SZ_OK; } @@ -131,23 +190,161 @@ static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, return res; } -void MtSync_Init(CMtSync *p) { p->needStart = True; } +// static void MtSync_Init(CMtSync *p) { p->needStart = True; } #define kMtMaxValForNormalize 0xFFFFFFFF +// #define kMtMaxValForNormalize ((1 << 25) + (1 << 20)) -#define DEF_GetHeads2(name, v, action) \ - static void GetHeads ## name(const Byte *p, UInt32 pos, \ - UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \ - { action; for (; numHeads != 0; numHeads--) { \ - const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } } +#ifdef MY_CPU_LE_UNALIGN + #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) +#else + #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16)) +#endif + +#define GetHeads_DECL(name) \ + static void GetHeads ## name(const Byte *p, UInt32 pos, \ + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) + +#define GetHeads_LOOP(v) \ + for (; numHeads != 0; numHeads--) { \ + const UInt32 value = (v); \ + p++; \ + *heads++ = pos - hash[value]; \ + hash[value] = pos++; } + +#define DEF_GetHeads2(name, v, action) \ + GetHeads_DECL(name) { action \ + GetHeads_LOOP(v) } + #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) -DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) -DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) -DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) -DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) -/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */ +DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask) +DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +// BT3 is not good for crc collisions for big hashMask values. + +/* +GetHeads_DECL(3b) +{ + UNUSED_VAR(hashMask); + UNUSED_VAR(crc); + { + const Byte *pLim = p + numHeads; + if (numHeads == 0) + return; + pLim--; + while (p < pLim) + { + UInt32 v1 = GetUi32(p); + UInt32 v0 = v1 & 0xFFFFFF; + UInt32 h0, h1; + p += 2; + v1 >>= 8; + h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++; + h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++; + heads += 2; + } + if (p == pLim) + { + UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16); + *heads = pos - hash[v0]; + hash[v0] = pos; + } + } +} +*/ + +/* +GetHeads_DECL(4) +{ + unsigned sh = 0; + UNUSED_VAR(crc) + while ((hashMask & 0x80000000) == 0) + { + hashMask <<= 1; + sh++; + } + GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh) +} +#define GetHeads4b GetHeads4 +*/ + +#define USE_GetHeads_LOCAL_CRC + +#ifdef USE_GetHeads_LOCAL_CRC + +GetHeads_DECL(4) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + // crc1[i] = rotlFixed(v, 8) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(4b) +{ + UInt32 crc0[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + crc0[i] = crc[i] & hashMask; + } + GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p)) +} + +GetHeads_DECL(5) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + UInt32 crc2[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + crc2[i] = (v << kLzHash_CrcShift_2) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(5b) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p)) +} + +#else + +DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask) +DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask) +DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask) +DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask) + +#endif + static void HashThreadFunc(CMatchFinderMt *mt) { @@ -244,11 +441,11 @@ static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) MY_NO_INLINE static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes) + UInt32 *d, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes) { do { - UInt32 *_distances = ++distances; + UInt32 *_distances = ++d; UInt32 delta = *hash++; CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; @@ -258,14 +455,15 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz unsigned maxLen = (unsigned)_maxLen; /* - if (size > 1) + #define PREF_STEP 1 + if (size > PREF_STEP) { - UInt32 delta = *hash; + UInt32 delta = hash[PREF_STEP - 1]; if (delta < _cyclicBufferSize) { - UInt32 cyc1 = _cyclicBufferPos + 1; + size_t cyc1 = _cyclicBufferPos + PREF_STEP; CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1); - Byte b = *(cur + 1 - delta); + Byte b = *(cur + PREF_STEP - delta); _distances[0] = pair[0]; _distances[1] = b; } @@ -276,8 +474,9 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz *ptr0 = *ptr1 = kEmptyHashValue; } else - for(;;) + for (LOG_ITER(g_NumIters_Tree++);;) { + LOG_ITER(g_NumIters_Loop++); { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; @@ -292,8 +491,8 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz if (maxLen < len) { maxLen = len; - *distances++ = (UInt32)len; - *distances++ = delta - 1; + *d++ = (UInt32)len; + *d++ = delta - 1; if (len == lenLimit) { UInt32 pair1 = pair[1]; @@ -333,39 +532,39 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz _cyclicBufferPos++; cur++; { - UInt32 num = (UInt32)(distances - _distances); + UInt32 num = (UInt32)(d - _distances); _distances[-1] = num; } } - while (distances < limit && --size != 0); + while (d < limit && --size != 0); *posRes = pos; - return distances; + return d; } #endif -static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) +static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) { UInt32 numProcessed = 0; UInt32 curPos = 2; UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2 - distances[1] = p->hashNumAvail; + d[1] = p->hashNumAvail; while (curPos < limit) { if (p->hashBufPos == p->hashBufPosLimit) { MatchFinderMt_GetNextBlock_Hash(p); - distances[1] = numProcessed + p->hashNumAvail; + d[1] = numProcessed + p->hashNumAvail; if (p->hashNumAvail >= p->numHashBytes) continue; - distances[0] = curPos + p->hashNumAvail; - distances += curPos; + d[0] = curPos + p->hashNumAvail; + d += curPos; for (; p->hashNumAvail != 0; p->hashNumAvail--) - *distances++ = 0; + *d++ = 0; return; } { @@ -387,7 +586,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) #ifndef MFMT_GM_INLINE while (curPos < limit && size-- != 0) { - UInt32 *startDistances = distances + curPos; + UInt32 *startDistances = d + curPos; UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, startDistances + 1, p->numHashBytes - 1) - startDistances); @@ -401,9 +600,9 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) { UInt32 posRes; curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, - distances + limit, - size, &posRes) - distances); + d + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, + d + limit, + size, &posRes) - d); p->hashBufPos += posRes - pos; cyclicBufferPos += posRes - pos; p->buffer += posRes - pos; @@ -420,7 +619,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) } } - distances[0] = curPos; + d[0] = curPos; } static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) @@ -448,7 +647,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) } } -void BtThreadFunc(CMatchFinderMt *mt) +static void BtThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->btSync; for (;;) @@ -491,6 +690,14 @@ void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) { MtSync_Destruct(&p->hashSync); MtSync_Destruct(&p->btSync); + + LOG_ITER( + printf("\nTree %9d * %7d iter = %9d sum \n", + (UInt32)(g_NumIters_Tree / 1000), + (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), + (UInt32)(g_NumIters_Loop / 1000) + )); + MatchFinderMt_FreeMem(p, alloc); } @@ -553,6 +760,7 @@ static void MatchFinderMt_Init(CMatchFinderMt *p) p->hash = mf->hash; p->fixedHashSize = mf->fixedHashSize; + // p->hash4Mask = mf->hash4Mask; p->crc = mf->crc; p->son = mf->son; @@ -572,22 +780,24 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) /* p->MatchFinder->ReleaseStream(); */ } -static void MatchFinderMt_Normalize(CMatchFinderMt *p) -{ - MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); - p->lzPos = p->historySize + 1; -} +MY_NO_INLINE static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) { - UInt32 blockIndex; + UInt32 blockIndex, k; + MtSync_GetNextBlock(&p->btSync); + blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); - p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; - p->btBufPosLimit += p->btBuf[p->btBufPos++]; - p->btNumAvailBytes = p->btBuf[p->btBufPos++]; + k = blockIndex * kMtBtBlockSize; + p->btBufPosLimit = k + p->btBuf[k]; + p->btNumAvailBytes = p->btBuf[k + 1]; + p->btBufPos = k + 2; if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) - MatchFinderMt_Normalize(p); + { + MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); + p->lzPos = p->historySize + 1; + } } static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) @@ -603,170 +813,289 @@ static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) return p->btNumAvailBytes; } -static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { - UInt32 h2, curMatch2; + UInt32 h2, c2; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; + UInt32 m = p->lzPos; MT_HASH2_CALC - curMatch2 = hash[h2]; - hash[h2] = lzPos; + c2 = hash[h2]; + hash[h2] = m; - if (curMatch2 >= matchMinPos) - if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + if (c2 >= matchMinPos) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - *distances++ = 2; - *distances++ = lzPos - curMatch2 - 1; + *d++ = 2; + *d++ = m - c2 - 1; } - return distances; + return d; } -static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { - UInt32 h2, h3, curMatch2, curMatch3; + UInt32 h2, h3, c2, c3; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; + UInt32 m = p->lzPos; MT_HASH3_CALC - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) { - distances[0] = 3; - return distances + 2; + d[0] = 3; + return d + 2; } - distances[0] = 2; - distances += 2; + d[0] = 2; + d += 2; } - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) { - *distances++ = 3; - *distances++ = lzPos - curMatch3 - 1; + *d++ = 3; + *d++ = m - c3 - 1; } - return distances; + return d; } + +#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; + /* -static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +static +UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) { - UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4; + UInt32 pos = p->btBufPos; + const UInt32 *bt = p->btBuf + pos; + UInt32 len = *bt++; + UInt32 matchMinPos; + const UInt32 *d_base = d; + UInt32 avail = p->btNumAvailBytes - 1; + p->btBufPos = pos + 1 + len; + + { + UInt32 temp1 = p->historySize; + p->btNumAvailBytes = avail; + + #define BT_HASH_BYTES_MAX 5 + + if (len != 0) + temp1 = bt[1]; + else if (avail < (BT_HASH_BYTES_MAX - 2)) + { + INCREASE_LZ_POS + return 0; + } + matchMinPos = p->lzPos - temp1; + } + + for (;;) + { + + UInt32 h2, h3, c2, c3; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH4_CALC - - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; - curMatch4 = (hash + kFix4HashSize)[h4]; + UInt32 m = p->lzPos; + MT_HASH3_CALC + + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + d += 2; + break; + } + // else + { + d[0] = 2; + d += 2; + } + } + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } + break; + } + + if (len != 0) + { + do + { + UInt32 v0 = bt[0]; + UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + while ((len -= 2) != 0); + } + INCREASE_LZ_POS + return (UInt32)(d - d_base); +} +*/ + + +static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 m = p->lzPos; + MT_HASH3_CALC + // MT_HASH4_CALC + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + // c4 = (hash + kFix4HashSize)[h4]; - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; - (hash + kFix4HashSize)[h4] = lzPos; + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + // (hash + kFix4HashSize)[h4] = m; + + #define _USE_H2 - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + #ifdef _USE_H2 + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) { - distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; - return distances + 2; + // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3; + // return d + 2; + + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + + #ifdef _USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + return d; } - distances[0] = 2; - distances += 2; + d[0] = 2; + d += 2; } + #endif - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) { - distances[1] = lzPos - curMatch3 - 1; - if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) + d[1] = m - c3 - 1; + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3]) { - distances[0] = 4; - return distances + 2; + d[0] = 4; + return d + 2; } - distances[0] = 3; - distances += 2; + d[0] = 3; + d += 2; } - if (curMatch4 >= matchMinPos) + #ifdef _USE_H4 + if (c4 >= matchMinPos) if ( - cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && - cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] ) { - *distances++ = 4; - *distances++ = lzPos - curMatch4 - 1; + *d++ = 4; + *d++ = m - c4 - 1; } + #endif - return distances; + return d; } -*/ -#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; -static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) +static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) { - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; + const UInt32 *bt = p->btBuf + p->btBufPos; + UInt32 len = *bt++; p->btBufPos += 1 + len; p->btNumAvailBytes--; { UInt32 i; for (i = 0; i < len; i += 2) { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances[0] = v0; - distances[1] = v1; - distances += 2; + UInt32 v0 = bt[0]; + UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; } } INCREASE_LZ_POS return len; } -static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) -{ - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; + +static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +{ + UInt32 pos = p->btBufPos; + const UInt32 *bt = p->btBuf + pos; + UInt32 len = *bt++; + UInt32 avail = p->btNumAvailBytes - 1; + p->btNumAvailBytes = avail; + p->btBufPos = pos + 1 + len; if (len == 0) { - /* change for bt5 ! */ - if (p->btNumAvailBytes-- >= 4) - len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); + #define BT_HASH_BYTES_MAX 5 + if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) + len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, d) - d); } else { - /* Condition: there are matches in btBuf with length < p->numHashBytes */ - UInt32 *distances2; - p->btNumAvailBytes--; - distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); + /* + first match pair from BinTree: (match_len, match_dist), + (match_len >= numHashBytes). + MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) + */ + UInt32 *d2; + d2 = p->MixMatchesFunc(p, p->lzPos - bt[1], d); do { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances2[0] = v0; - distances2[1] = v1; - distances2 += 2; + UInt32 v0 = bt[0]; + UInt32 v1 = bt[1]; + bt += 2; + d2[0] = v0; + d2[1] = v1; + d2 += 2; } while ((len -= 2) != 0); - len = (UInt32)(distances2 - (distances)); + len = (UInt32)(d2 - d); } INCREASE_LZ_POS return len; @@ -802,19 +1131,18 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) SKIP_FOOTER_MT } -/* static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER_MT(4) - UInt32 h2, h3, h4; - MT_HASH4_CALC - (hash + kFix4HashSize)[h4] = + UInt32 h2, h3 /*, h4 */; + MT_HASH3_CALC + // MT_HASH4_CALC + // (hash + kFix4HashSize)[h4] = (hash + kFix3HashSize)[h3] = hash[ h2] = p->lzPos; SKIP_FOOTER_MT } -*/ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) { @@ -832,22 +1160,23 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; break; case 3: - p->GetHeadsFunc = GetHeads3; + p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads3b : GetHeads3; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; break; - default: - /* case 4: */ + case 4: p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; + + // it's fast inline version of GetMatches() + // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; break; - /* default: - p->GetHeadsFunc = GetHeads5; + p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads5b : GetHeads5; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; break; - */ } } diff --git a/deps/LZMA-SDK/C/LzFindMt.h b/deps/LZMA-SDK/C/LzFindMt.h index fdd17008c..888c787cb 100644 --- a/deps/LZMA-SDK/C/LzFindMt.h +++ b/deps/LZMA-SDK/C/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2018-07-04 : Igor Pavlov : Public domain */ +2019-11-05 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_MT_H #define __LZ_FIND_MT_H @@ -9,14 +9,6 @@ EXTERN_C_BEGIN -#define kMtHashBlockSize (1 << 13) -#define kMtHashNumBlocks (1 << 3) -#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) - -#define kMtBtBlockSize (1 << 14) -#define kMtBtNumBlocks (1 << 6) -#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) - typedef struct _CMtSync { BoolInt wasCreated; @@ -34,6 +26,7 @@ typedef struct _CMtSync BoolInt csWasEntered; CCriticalSection cs; UInt32 numProcessedBlocks; + UInt64 affinity; } CMtSync; typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); @@ -56,11 +49,12 @@ typedef struct _CMatchFinderMt UInt32 *hash; UInt32 fixedHashSize; + // UInt32 hash4Mask; UInt32 historySize; const UInt32 *crc; Mf_Mix_Matches MixMatchesFunc; - + /* LZ + BT */ CMtSync btSync; Byte btDummy[kMtCacheLineDummy]; diff --git a/deps/LZMA-SDK/C/LzHash.h b/deps/LZMA-SDK/C/LzHash.h index 219144407..a682f83be 100644 --- a/deps/LZMA-SDK/C/LzHash.h +++ b/deps/LZMA-SDK/C/LzHash.h @@ -1,57 +1,34 @@ /* LzHash.h -- HASH functions for LZ algorithms -2015-04-12 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZ_HASH_H #define __LZ_HASH_H +/* + (kHash2Size >= (1 << 8)) : Required + (kHash3Size >= (1 << 16)) : Required +*/ + #define kHash2Size (1 << 10) #define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) +// #define kHash4Size (1 << 20) #define kFix3HashSize (kHash2Size) #define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) - -#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); - -#define HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } - -#define HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } - -#define HASH5_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - temp ^= (p->crc[cur[3]] << 5); \ - h4 = temp & (kHash4Size - 1); \ - hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } - -/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - - -#define MT_HASH2_CALC \ - h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } - -#define MT_HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +/* + We use up to 3 crc values for hash: + crc0 + crc1 << Shift_1 + crc2 << Shift_2 + (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff. + Small values for Shift are not good for collision rate. + Big value for Shift_2 increases the minimum size + of hash table, that will be slow for small files. +*/ + +#define kLzHash_CrcShift_1 5 +#define kLzHash_CrcShift_2 10 #endif diff --git a/deps/LZMA-SDK/C/Lzma2Dec.c b/deps/LZMA-SDK/C/Lzma2Dec.c index 2e631051b..f9f98095d 100644 --- a/deps/LZMA-SDK/C/Lzma2Dec.c +++ b/deps/LZMA-SDK/C/Lzma2Dec.c @@ -1,5 +1,5 @@ /* Lzma2Dec.c -- LZMA2 Decoder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ /* #define SHOW_DEBUG_INFO */ @@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p) LzmaDec_Init(&p->decoder); } -static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) +// ELzma2State +static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) { switch (p->state) { diff --git a/deps/LZMA-SDK/C/Lzma2DecMt.c b/deps/LZMA-SDK/C/Lzma2DecMt.c index 87d5567ad..252b5be49 100644 --- a/deps/LZMA-SDK/C/Lzma2DecMt.c +++ b/deps/LZMA-SDK/C/Lzma2DecMt.c @@ -1,25 +1,25 @@ /* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" // #define SHOW_DEBUG_INFO +// #define _7ZIP_ST + #ifdef SHOW_DEBUG_INFO #include #endif +#ifndef _7ZIP_ST #ifdef SHOW_DEBUG_INFO #define PRF(x) x #else #define PRF(x) #endif - #define PRF_STR(s) PRF(printf("\n" s "\n")) -#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) #define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2)) - -// #define _7ZIP_ST +#endif #include "Alloc.h" @@ -28,10 +28,10 @@ #ifndef _7ZIP_ST #include "MtDec.h" -#endif - #define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28) +#endif + void Lzma2DecMtProps_Init(CLzma2DecMtProps *p) { @@ -255,7 +255,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa const unsigned kNumAlignBits = 12; const unsigned kNumCacheLineBits = 7; /* <= kNumAlignBits */ t->alloc.numAlignBits = kNumAlignBits; - t->alloc.offset = ((UInt32)coderIndex * ((1 << 11) + (1 << 8) + (1 << 6))) & ((1 << kNumAlignBits) - (1 << kNumCacheLineBits)); + t->alloc.offset = ((UInt32)coderIndex * (((unsigned)1 << 11) + (1 << 8) + (1 << 6))) & (((unsigned)1 << kNumAlignBits) - ((unsigned)1 << kNumCacheLineBits)); t->alloc.baseAlloc = me->alignOffsetAlloc.baseAlloc; } } @@ -527,7 +527,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex, static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex, BoolInt needWriteToStream, - const Byte *src, size_t srcSize, + const Byte *src, size_t srcSize, BoolInt isCross, BoolInt *needContinue, BoolInt *canRecode) { CLzma2DecMt *me = (CLzma2DecMt *)pp; @@ -536,12 +536,14 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex, const Byte *data = t->outBuf; BoolInt needContinue2 = True; + UNUSED_VAR(src) + UNUSED_VAR(srcSize) + UNUSED_VAR(isCross) + PRF_STR_INT_2("Write", coderIndex, srcSize); *needContinue = False; *canRecode = True; - UNUSED_VAR(src) - UNUSED_VAR(srcSize) if ( // t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK @@ -696,7 +698,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p inPos = 0; inLim = p->inBufSize; inData = p->inBuf; - p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim); + p->readRes = ISeqInStream_Read(p->inStream, (void *)(p->inBuf), &inLim); // p->readProcessed += inLim; // inLim -= 5; p->readWasFinished = True; // for test if (inLim == 0 || p->readRes != SZ_OK) @@ -838,6 +840,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, p->inProcessed = 0; p->readWasFinished = False; + p->readRes = SZ_OK; *isMT = False; @@ -856,7 +859,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, if (p->props.numThreads > 1) { - IMtDecCallback vt; + IMtDecCallback2 vt; Lzma2DecMt_FreeSt(p); @@ -955,7 +958,12 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, *inProcessed = p->inProcessed; // res = SZ_OK; // for test - if (res == SZ_OK && p->readRes != SZ_OK) + if (res == SZ_ERROR_INPUT_EOF) + { + if (p->readRes != SZ_OK) + res = p->readRes; + } + else if (res == SZ_OK && p->readRes != SZ_OK) res = p->readRes; /* diff --git a/deps/LZMA-SDK/C/Lzma2Enc.c b/deps/LZMA-SDK/C/Lzma2Enc.c index d54147752..c8b114cb4 100644 --- a/deps/LZMA-SDK/C/Lzma2Enc.c +++ b/deps/LZMA-SDK/C/Lzma2Enc.c @@ -1,5 +1,5 @@ /* Lzma2Enc.c -- LZMA2 Encoder -2018-07-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -330,7 +330,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p) numBlocks++; if (numBlocks < (unsigned)t2) { - t2r = (unsigned)numBlocks; + t2r = (int)numBlocks; if (t2r == 0) t2r = 1; t3 = t1 * t2r; @@ -632,15 +632,15 @@ static SRes Lzma2Enc_EncodeMt1( { if (outBuf) { - size_t destPos = *outBufSize; + const size_t destPos = *outBufSize; if (destPos >= outLim) return SZ_ERROR_OUTPUT_EOF; - outBuf[destPos] = 0; + outBuf[destPos] = LZMA2_CONTROL_EOF; // 0 *outBufSize = destPos + 1; } else { - Byte b = 0; + const Byte b = LZMA2_CONTROL_EOF; // 0; if (ISeqOutStream_Write(outStream, &b, 1) != 1) return SZ_ERROR_WRITE; } @@ -780,13 +780,13 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, p->outBufSize = destBlockSize; } - p->mtCoder.numThreadsMax = p->props.numBlockThreads_Max; + p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max; p->mtCoder.expectedDataSize = p->expectedDataSize; { SRes res = MtCoder_Code(&p->mtCoder); if (!outStream) - *outBufSize = p->outBuf - outBuf; + *outBufSize = (size_t)(p->outBuf - outBuf); return res; } } diff --git a/deps/LZMA-SDK/C/Lzma86Enc.c b/deps/LZMA-SDK/C/Lzma86Enc.c index 8d35e6dc5..99397bc5e 100644 --- a/deps/LZMA-SDK/C/Lzma86Enc.c +++ b/deps/LZMA-SDK/C/Lzma86Enc.c @@ -11,8 +11,6 @@ #include "Bra.h" #include "LzmaEnc.h" -#define SZE_OUT_OVERFLOW SZE_DATA_ERROR - int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen, int level, UInt32 dictSize, int filterMode) { diff --git a/deps/LZMA-SDK/C/LzmaDec.c b/deps/LZMA-SDK/C/LzmaDec.c index 4d1576419..80b70a9ee 100644 --- a/deps/LZMA-SDK/C/LzmaDec.c +++ b/deps/LZMA-SDK/C/LzmaDec.c @@ -1,5 +1,5 @@ /* LzmaDec.c -- LZMA Decoder -2018-07-04 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -13,10 +13,12 @@ #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 #define RC_INIT_SIZE 5 +#ifndef _LZMA_DEC_OPT + +#define kNumMoveBits 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) @@ -62,9 +64,10 @@ probLit = prob + (offs + bit + symbol); \ GET_BIT2(probLit, symbol, offs ^= bit; , ;) +#endif // _LZMA_DEC_OPT -#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; @@ -114,6 +117,9 @@ #define kMatchMinLen 2 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) + /* External ASM code needs same CLzmaProb array layout. So don't change it. */ /* (probs_1664) is faster and better for code size at some platforms */ @@ -166,10 +172,12 @@ /* p->remainLen : shows status of LZMA decoder: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : need init range coder - = kMatchSpecLenStart + 2 : need init range coder and state + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error */ /* ---------- LZMA_DECODE_REAL ---------- */ @@ -188,23 +196,31 @@ In: { LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol - is not END_OF_PAYALOAD_MARKER, then function returns error code. + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. } Processing: - first LZMA symbol will be decoded in any case - All checks for limits are at the end of main loop, - It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. Out: RangeCoder is normalized Result: SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined */ @@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit else { UPDATE_1(prob); - /* - // that case was checked before with kBadRepCode - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - */ prob = probs + IsRepG0 + state; IF_BIT_0(prob) { @@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit IF_BIT_0(prob) { UPDATE_0(prob); + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; processedPos++; @@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) { - p->dicPos = dicPos; - return SZ_ERROR_DATA; + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; } } @@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit if ((rem = limit - dicPos) == 0) { - p->dicPos = dicPos; - return SZ_ERROR_DATA; + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; } curLen = ((rem < len) ? (unsigned)rem : len); @@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit p->buf = buf; p->range = range; p->code = code; - p->remainLen = (UInt32)len; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. p->dicPos = dicPos; p->processedPos = processedPos; p->reps[0] = rep0; @@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit p->reps[2] = rep2; p->reps[3] = rep3; p->state = (UInt32)state; - + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; return SZ_OK; } #endif + + static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; { - Byte *dic = p->dic; SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = (unsigned)p->remainLen; - SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ - SizeT rem = limit - dicPos; - if (rem < len) - len = (unsigned)(rem); + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) p->checkDicSize = p->prop.dicSize; p->processedPos += (UInt32)len; p->remainLen -= (UInt32)len; - while (len != 0) + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do { - len--; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; } + while (--len); p->dicPos = dicPos; } } +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + #define kRange0 0xFFFFFFFF #define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) #define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) @@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) #error Stop_Compiling_Bad_LZMA_Check #endif + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { - do + if (p->checkDicSize == 0) { - SizeT limit2 = limit; - if (p->checkDicSize == 0) - { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - - if (p->processedPos == 0) - if (p->code >= kBadRepCode) - return SZ_ERROR_DATA; - } - - RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); - + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; + } + { + int res = LZMA_DECODE_REAL(p, limit, bufLimit); if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) p->checkDicSize = p->prop.dicSize; - - LzmaDec_WriteRem(p, limit); + return res; } - while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - return 0; } + + typedef enum { - DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_INPUT_EOF, /* need more input data */ DUMMY_LIT, DUMMY_MATCH, DUMMY_REP } ELzmaDummy; -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) { UInt32 range = p->range; UInt32 code = p->code; - const Byte *bufLimit = buf + inSize; + const Byte *bufLimit = *bufOut; const CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; ELzmaDummy res; + for (;;) { const CLzmaProb *prob; UInt32 bound; unsigned ttt; - unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - prob = probs + Literal; if (p->checkDicSize != 0 || p->processedPos != 0) prob += ((UInt32)LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); if (state < kNumLitStates) { @@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; + break; } else { @@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS { unsigned numDirectBits = ((posSlot >> 1) - 1); - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - if (posSlot < kEndPosModelIndex) { prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); @@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS } } } + break; } NORMALIZE_CHECK; + + *bufOut = buf; return res; } - +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) { p->remainLen = kMatchSpecLenStart + 1; @@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p) } +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN__NOT_FINISHED__FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + + SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT inSize = *srcLen; (*srcLen) = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; if (p->remainLen > kMatchSpecLenStart) { + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) p->tempBuf[p->tempBufSize++] = *src++; if (p->tempBufSize != 0 && p->tempBuf[0] != 0) @@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr | ((UInt32)p->tempBuf[2] << 16) | ((UInt32)p->tempBuf[3] << 8) | ((UInt32)p->tempBuf[4]); + + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; + p->range = 0xFFFFFFFF; p->tempBufSize = 0; @@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr p->remainLen = 0; } - LzmaDec_WriteRem(p, dicLimit); - - while (p->remainLen != kMatchSpecLenStart) + for (;;) { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + int checkEndMarkNow = 0; if (p->dicPos >= dicLimit) @@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr } if (p->remainLen != 0) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + RETURN__NOT_FINISHED__FOR_FINISH; } checkEndMarkNow = 1; } + // (p->remainLen == 0) + if (p->tempBufSize == 0) { - SizeT processed; const Byte *bufLimit; + int dummyProcessed = -1; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; } + bufLimit = src; + // we will decode only one iteration } else bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; + + { + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) + { + if (processed > inSize) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + continue; } - else + { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem); - if (dummyRes == DUMMY_ERROR) + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) { - (*srcLen) += (SizeT)lookAhead; + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; } } + p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; { - unsigned kkk = (unsigned)(p->buf - p->tempBuf); - if (rem < kkk) - return SZ_ERROR_FAIL; /* some internal error */ - rem -= kkk; - if (lookAhead < rem) - return SZ_ERROR_FAIL; /* some internal error */ - lookAhead -= rem; + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } } - (*srcLen) += (SizeT)lookAhead; - src += lookAhead; - inSize -= (SizeT)lookAhead; - p->tempBufSize = 0; } + } } - - if (p->code != 0) - return SZ_ERROR_DATA; - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return SZ_OK; + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; } + SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen; diff --git a/deps/LZMA-SDK/C/LzmaDec.h b/deps/LZMA-SDK/C/LzmaDec.h index 28ce60c3e..6194b7d12 100644 --- a/deps/LZMA-SDK/C/LzmaDec.h +++ b/deps/LZMA-SDK/C/LzmaDec.h @@ -1,5 +1,5 @@ /* LzmaDec.h -- LZMA Decoder -2018-04-21 : Igor Pavlov : Public domain */ +2020-03-19 : Igor Pavlov : Public domain */ #ifndef __LZMA_DEC_H #define __LZMA_DEC_H @@ -181,6 +181,7 @@ Returns: LZMA_STATUS_NEEDS_MORE_INPUT LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, @@ -223,6 +224,7 @@ Returns: SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, diff --git a/deps/LZMA-SDK/C/LzmaEnc.c b/deps/LZMA-SDK/C/LzmaEnc.c index 14086fc4f..86dcb1963 100644 --- a/deps/LZMA-SDK/C/LzmaEnc.c +++ b/deps/LZMA-SDK/C/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2019-01-10: Igor Pavlov : Public domain */ +2021-04-01: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -19,6 +19,19 @@ #include "LzFindMt.h" #endif +/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); +void LzmaEnc_Finish(CLzmaEncHandle pp); +void LzmaEnc_SaveState(CLzmaEncHandle pp); +void LzmaEnc_RestoreState(CLzmaEncHandle pp); + #ifdef SHOW_STAT static unsigned g_STAT_OFFSET = 0; #endif @@ -36,7 +49,7 @@ static unsigned g_STAT_OFFSET = 0; #define kNumMoveReducingBits 4 #define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) +// #define kBitPrice (1 << kNumBitPriceShiftBits) #define REP_LEN_COUNT 64 @@ -47,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->reduceSize = (UInt64)(Int64)-1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->writeEndMark = 0; + p->affinity = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) @@ -55,7 +69,13 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (level < 0) level = 5; p->level = level; - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); + if (p->dictSize == 0) + p->dictSize = + ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : + ( level <= 6 ? ((UInt32)1 << (level + 19)) : + ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) + ))); + if (p->dictSize > p->reduceSize) { unsigned i; @@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) p->numHashBytes = 4; - if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); + if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); if (p->numThreads < 0) p->numThreads = @@ -93,7 +113,7 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) return props.dictSize; } -#if (_MSC_VER >= 1400) +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* BSR code is fast for some new CPUs */ /* #define LZMA_LOG_BSR */ #endif @@ -193,7 +213,7 @@ typedef struct #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 +// #define kDicLogSizeMin 0 #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) @@ -462,16 +482,16 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->dictSize = props.dictSize; { - unsigned fb = props.fb; + unsigned fb = (unsigned)props.fb; if (fb < 5) fb = 5; if (fb > LZMA_MATCH_LEN_MAX) fb = LZMA_MATCH_LEN_MAX; p->numFastBytes = fb; } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; + p->lc = (unsigned)props.lc; + p->lp = (unsigned)props.lp; + p->pb = (unsigned)props.pb; p->fastMode = (props.algo == 0); // p->_maxMode = True; p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); @@ -479,17 +499,17 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) unsigned numHashBytes = 4; if (props.btMode) { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; + if (props.numHashBytes < 2) numHashBytes = 2; + else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes; } + if (props.numHashBytes >= 5) numHashBytes = 5; + p->matchFinderBase.numHashBytes = numHashBytes; } p->matchFinderBase.cutValue = props.mc; - p->writeEndMark = props.writeEndMark; + p->writeEndMark = (BoolInt)props.writeEndMark; #ifndef _7ZIP_ST /* @@ -500,6 +520,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) } */ p->multiThread = (props.numThreads > 1); + p->matchFinderMt.btSync.affinity = + p->matchFinderMt.hashSync.affinity = props.affinity; #endif return SZ_OK; @@ -536,8 +558,8 @@ static void RangeEnc_Construct(CRangeEnc *p) p->bufBase = NULL; } -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) -#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) +#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) #define RC_BUF_SIZE (1 << 16) @@ -578,7 +600,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) size_t num; if (p->res != SZ_OK) return; - num = p->buf - p->bufBase; + num = (size_t)(p->buf - p->bufBase); if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) p->res = SZ_ERROR_WRITE; p->processed += num; @@ -656,7 +678,7 @@ static void RangeEnc_FlushData(CRangeEnc *p) range += newBound & mask; \ mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ mask += ((1 << kNumMoveBits) - 1); \ - ttt += (Int32)(mask - ttt) >> kNumMoveBits; \ + ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \ *(prob) = (CLzmaProb)ttt; \ RC_NORM(p) \ } @@ -749,7 +771,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) bitCount++; } } - ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); // printf("\n%3d: %5d", i, ProbPrices[i]); } } @@ -1011,7 +1033,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) { const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; const Byte *p2 = p1 + len; - ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; + ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; const Byte *lim = p1 + numAvail; for (; p2 != lim && *p2 == p2[dif]; p2++) {} @@ -2198,7 +2220,7 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) -void LzmaEnc_Construct(CLzmaEnc *p) +static void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); MatchFinder_Construct(&p->matchFinderBase); @@ -2233,7 +2255,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) return p; } -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) +static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->litProbs); ISzAlloc_Free(alloc, p->saveState.litProbs); @@ -2241,7 +2263,7 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) p->saveState.litProbs = NULL; } -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { #ifndef _7ZIP_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); @@ -2259,6 +2281,7 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) } +MY_NO_INLINE static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; @@ -2521,12 +2544,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa // { int y; for (y = 0; y < 100; y++) { FillDistancesPrices(p); // }} - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); } if (p->repLenEncCounter <= 0) { p->repLenEncCounter = REP_LEN_COUNT; - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } } @@ -2611,7 +2634,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, return SZ_OK; } -void LzmaEnc_Init(CLzmaEnc *p) +static void LzmaEnc_Init(CLzmaEnc *p) { unsigned i; p->state = 0; @@ -2675,12 +2698,12 @@ void LzmaEnc_Init(CLzmaEnc *p) p->additionalOffset = 0; - p->pbMask = (1 << p->pb) - 1; + p->pbMask = ((unsigned)1 << p->pb) - 1; p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); } -void LzmaEnc_InitPrices(CLzmaEnc *p) +static void LzmaEnc_InitPrices(CLzmaEnc *p) { if (!p->fastMode) { @@ -2694,8 +2717,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p) p->repLenEncCounter = REP_LEN_COUNT; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) @@ -2788,12 +2811,13 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s } +/* UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { const CLzmaEnc *p = (CLzmaEnc *)pp; return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } - +*/ const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { @@ -2841,6 +2865,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, } +MY_NO_INLINE static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) { SRes res = SZ_OK; @@ -2899,14 +2924,14 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) if (dictSize >= ((UInt32)1 << 22)) { - UInt32 kDictMask = ((UInt32)1 << 20) - 1; + const UInt32 kDictMask = ((UInt32)1 << 20) - 1; if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) dictSize = (dictSize + kDictMask) & ~kDictMask; } else for (i = 11; i <= 30; i++) { - if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } - if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } + if (dictSize <= ((UInt32)2 << i)) { dictSize = ((UInt32)2 << i); break; } + if (dictSize <= ((UInt32)3 << i)) { dictSize = ((UInt32)3 << i); break; } } for (i = 0; i < 4; i++) @@ -2917,7 +2942,7 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) { - return ((CLzmaEnc *)pp)->writeEndMark; + return (unsigned)((CLzmaEnc *)pp)->writeEndMark; } @@ -2974,3 +2999,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, LzmaEnc_Destroy(p, alloc, allocBig); return res; } + + +/* +#ifndef _7ZIP_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + lz_threads[0] = p->matchFinderMt.hashSync.thread; + lz_threads[1] = p->matchFinderMt.btSync.thread; +} +#endif +*/ diff --git a/deps/LZMA-SDK/C/LzmaEnc.h b/deps/LZMA-SDK/C/LzmaEnc.h index c9938f04b..26757ba6b 100644 --- a/deps/LZMA-SDK/C/LzmaEnc.h +++ b/deps/LZMA-SDK/C/LzmaEnc.h @@ -1,5 +1,5 @@ /* LzmaEnc.h -- LZMA Encoder -2017-07-27 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZMA_ENC_H #define __LZMA_ENC_H @@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ + + UInt64 affinity; } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); diff --git a/deps/LZMA-SDK/C/LzmaLib.h b/deps/LZMA-SDK/C/LzmaLib.h index 5c35e5365..4103e224a 100644 --- a/deps/LZMA-SDK/C/LzmaLib.h +++ b/deps/LZMA-SDK/C/LzmaLib.h @@ -1,5 +1,5 @@ /* LzmaLib.h -- LZMA library interface -2013-01-18 : Igor Pavlov : Public domain */ +2021-04-03 : Igor Pavlov : Public domain */ #ifndef __LZMA_LIB_H #define __LZMA_LIB_H @@ -40,14 +40,16 @@ outPropsSize - level - compression level: 0 <= level <= 9; level dictSize algo fb - 0: 16 KB 0 32 - 1: 64 KB 0 32 - 2: 256 KB 0 32 - 3: 1 MB 0 32 - 4: 4 MB 0 32 + 0: 64 KB 0 32 + 1: 256 KB 0 32 + 2: 1 MB 0 32 + 3: 4 MB 0 32 + 4: 16 MB 0 32 5: 16 MB 1 32 6: 32 MB 1 32 - 7+: 64 MB 1 64 + 7: 32 MB 1 64 + 8: 64 MB 1 64 + 9: 64 MB 1 64 The default value for "level" is 5. @@ -83,6 +85,11 @@ fb - Word size (the number of fast bytes). numThreads - The number of thereads. 1 or 2. The default value is 2. Fast mode (algo = 0) can use only 1 thread. +In: + dest - output data buffer + destLen - output data buffer size + src - input data + srcLen - input data size Out: destLen - processed output size Returns: @@ -108,8 +115,8 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char LzmaUncompress -------------- In: - dest - output data - destLen - output data size + dest - output data buffer + destLen - output data buffer size src - input data srcLen - input data size Out: diff --git a/deps/LZMA-SDK/C/MtCoder.c b/deps/LZMA-SDK/C/MtCoder.c index 5667f2d5b..85444f484 100644 --- a/deps/LZMA-SDK/C/MtCoder.c +++ b/deps/LZMA-SDK/C/MtCoder.c @@ -1,5 +1,5 @@ /* MtCoder.c -- Multi-thread Coder -2018-07-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,7 +7,7 @@ #ifndef _7ZIP_ST -SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) +static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) { CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt); UInt64 inSize2 = 0; @@ -70,8 +70,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t) { t->stop = 1; Event_Set(&t->startEvent); - Thread_Wait(&t->thread); - Thread_Close(&t->thread); + Thread_Wait_Close(&t->thread); } Event_Close(&t->startEvent); @@ -342,7 +341,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp) for (;;) { if (Event_Wait(&t->startEvent) != 0) - return SZ_ERROR_THREAD; + return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; if (t->stop) return 0; { @@ -358,7 +357,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp) unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); if (numFinished == mtc->numStartedThreads) if (Event_Set(&mtc->finishedEvent) != 0) - return SZ_ERROR_THREAD; + return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; } #endif } diff --git a/deps/LZMA-SDK/C/MtDec.c b/deps/LZMA-SDK/C/MtDec.c index 25a8b046d..24441b3a7 100644 --- a/deps/LZMA-SDK/C/MtDec.c +++ b/deps/LZMA-SDK/C/MtDec.c @@ -1,16 +1,21 @@ /* MtDec.c -- Multi-thread Decoder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-27 : Igor Pavlov : Public domain */ #include "Precomp.h" // #define SHOW_DEBUG_INFO // #include +#include #ifdef SHOW_DEBUG_INFO #include #endif +#include "MtDec.h" + +#ifndef _7ZIP_ST + #ifdef SHOW_DEBUG_INFO #define PRF(x) x #else @@ -19,10 +24,6 @@ #define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) -#include "MtDec.h" - -#ifndef _7ZIP_ST - void MtProgress_Init(CMtProgress *p, ICompressProgress *progress) { p->progress = progress; @@ -77,7 +78,7 @@ void MtProgress_SetError(CMtProgress *p, SRes res) } -#define RINOK_THREAD(x) RINOK(x) +#define RINOK_THREAD(x) RINOK_WRes(x) static WRes ArEvent_OptCreate_And_Reset(CEvent *p) @@ -156,8 +157,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t) { Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */ Event_Set(&t->canRead); - Thread_Wait(&t->thread); - Thread_Close(&t->thread); + Thread_Wait_Close(&t->thread); } Event_Close(&t->canRead); @@ -289,12 +289,13 @@ static WRes ThreadFunc2(CMtDecThread *t) Byte *afterEndData = NULL; size_t afterEndData_Size = 0; + BoolInt afterEndData_IsCross = False; BoolInt canCreateNewThread = False; // CMtDecCallbackInfo parse; CMtDecThread *nextThread; - PRF_STR_INT("Event_Wait(&t->canRead)", t->index); + PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index); RINOK_THREAD(Event_Wait(&t->canRead)); if (p->exitThread) @@ -418,10 +419,12 @@ static WRes ThreadFunc2(CMtDecThread *t) parse.srcFinished = finish; parse.canCreateNewThread = True; - // PRF(printf("\nParse size = %d\n", (unsigned)size)) + PRF(printf("\nParse size = %d\n", (unsigned)size)); p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse); + PRF(printf(" Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state)); + needWrite = True; canCreateNewThread = parse.canCreateNewThread; @@ -478,16 +481,12 @@ static WRes ThreadFunc2(CMtDecThread *t) if (parse.state == MTDEC_PARSE_END) { - p->crossStart = 0; - p->crossEnd = 0; - - if (crossSize != 0) - memcpy(data + parse.srcSize, parseData + parse.srcSize, size - parse.srcSize); // we need all data - afterEndData_Size = size - parse.srcSize; afterEndData = parseData + parse.srcSize; - + afterEndData_Size = size - parse.srcSize; + if (crossSize != 0) + afterEndData_IsCross = True; // we reduce data size to required bytes (parsed only) - inDataSize -= (size - parse.srcSize); + inDataSize -= afterEndData_Size; if (!prev) inDataSize_Start = parse.srcSize; break; @@ -752,13 +751,15 @@ static WRes ThreadFunc2(CMtDecThread *t) { // p->inProcessed += inCodePos; + PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size)); + res = p->mtCallback->Write(p->mtCallbackObject, t->index, res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite - afterEndData, afterEndData_Size, + afterEndData, afterEndData_Size, afterEndData_IsCross, &needContinue, &canRecode); - - // res= E_INVALIDARG; // for test + + // res = SZ_ERROR_FAIL; // for test PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue)); PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed)); @@ -847,7 +848,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp) res = ThreadFunc2(t); p = t->mtDec; if (res == 0) - return p->exitThreadWRes; + return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes; { // it's unexpected situation for some threading function error if (p->exitThreadWRes == 0) @@ -858,15 +859,14 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp) Event_Set(&p->threads[0].canWrite); MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res)); } - return res; + return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res; } static MY_NO_INLINE THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp) { + #ifdef USE_ALLOCA CMtDecThread *t = (CMtDecThread *)pp; - // fprintf(stderr, "\n%d = %p - before", t->index, &t); - #ifdef USE_ALLOCA t->allocaPtr = alloca(t->index * 128); #endif return ThreadFunc1(pp); @@ -1092,13 +1092,14 @@ SRes MtDec_Code(CMtDec *p) { WRes wres; - WRes sres; + SRes sres; CMtDecThread *nextThread = &p->threads[p->numStartedThreads++]; // wres = MtDecThread_CreateAndStart(nextThread); wres = MtDecThread_CreateEvents(nextThread); if (wres == 0) { wres = Event_Set(&nextThread->canWrite); if (wres == 0) { wres = Event_Set(&nextThread->canRead); - if (wres == 0) { wres = ThreadFunc(nextThread); + if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread); + wres = (WRes)(UINT_PTR)res; if (wres != 0) { p->needContinue = False; @@ -1130,8 +1131,8 @@ SRes MtDec_Code(CMtDec *p) return SZ_OK; // if (sres != SZ_OK) - return sres; - // return E_FAIL; + return sres; + // return SZ_ERROR_FAIL; } } diff --git a/deps/LZMA-SDK/C/MtDec.h b/deps/LZMA-SDK/C/MtDec.h index 9864cc874..7a30b6a9e 100644 --- a/deps/LZMA-SDK/C/MtDec.h +++ b/deps/LZMA-SDK/C/MtDec.h @@ -1,5 +1,5 @@ /* MtDec.h -- Multi-thread Decoder -2018-07-04 : Igor Pavlov : Public domain */ +2020-03-05 : Igor Pavlov : Public domain */ #ifndef __MT_DEC_H #define __MT_DEC_H @@ -108,11 +108,12 @@ typedef struct */ SRes (*Write)(void *p, unsigned coderIndex, BoolInt needWriteToStream, - const Byte *src, size_t srcSize, + const Byte *src, size_t srcSize, BoolInt isCross, // int srcFinished, BoolInt *needContinue, BoolInt *canRecode); -} IMtDecCallback; + +} IMtDecCallback2; @@ -132,7 +133,7 @@ typedef struct _CMtDec ICompressProgress *progress; ISzAllocPtr alloc; - IMtDecCallback *mtCallback; + IMtDecCallback2 *mtCallback; void *mtCallbackObject; diff --git a/deps/LZMA-SDK/C/Ppmd.h b/deps/LZMA-SDK/C/Ppmd.h index 4b9941521..ee93ecece 100644 --- a/deps/LZMA-SDK/C/Ppmd.h +++ b/deps/LZMA-SDK/C/Ppmd.h @@ -1,5 +1,5 @@ /* Ppmd.h -- PPMD codec common code -2017-04-03 : Igor Pavlov : Public domain +2021-04-13 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #ifndef __PPMD_H @@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ EXTERN_C_BEGIN -#ifdef MY_CPU_32BIT +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +/* + PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block. + if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields. + if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields. + if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed, + if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional, + and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit. + PPMD code works slightly faster in (PPMD_32BIT) mode. +*/ #define PPMD_32BIT #endif @@ -28,7 +37,7 @@ EXTERN_C_BEGIN #define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) #define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) -#pragma pack(push, 1) +MY_CPU_pragma_pack_push_1 /* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ /* SEE-contexts for PPM-contexts with masked symbols */ @@ -40,41 +49,114 @@ typedef struct } CPpmd_See; #define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ - { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); } + { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); } + typedef struct { Byte Symbol; Byte Freq; - UInt16 SuccessorLow; - UInt16 SuccessorHigh; + UInt16 Successor_0; + UInt16 Successor_1; } CPpmd_State; -#pragma pack(pop) - -typedef - #ifdef PPMD_32BIT - CPpmd_State * - #else - UInt32 - #endif - CPpmd_State_Ref; - -typedef - #ifdef PPMD_32BIT - void * - #else - UInt32 - #endif - CPpmd_Void_Ref; - -typedef - #ifdef PPMD_32BIT - Byte * - #else - UInt32 - #endif - CPpmd_Byte_Ref; +typedef struct CPpmd_State2_ +{ + Byte Symbol; + Byte Freq; +} CPpmd_State2; + +typedef struct CPpmd_State4_ +{ + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State4; + +MY_CPU_pragma_pop + +/* + PPMD code can write full CPpmd_State structure data to CPpmd*_Context + at (byte offset = 2) instead of some fields of original CPpmd*_Context structure. + + If we use pointers to different types, but that point to shared + memory space, we can have aliasing problem (strict aliasing). + + XLC compiler in -O2 mode can change the order of memory write instructions + in relation to read instructions, if we have use pointers to different types. + + To solve that aliasing problem we use combined CPpmd*_Context structure + with unions that contain the fields from both structures: + the original CPpmd*_Context and CPpmd_State. + So we can access the fields from both structures via one pointer, + and the compiler doesn't change the order of write instructions + in relation to read instructions. + + If we don't use memory write instructions to shared memory in + some local code, and we use only reading instructions (read only), + then probably it's safe to use pointers to different types for reading. +*/ + + + +#ifdef PPMD_32BIT + + #define Ppmd_Ref_Type(type) type * + #define Ppmd_GetRef(p, ptr) (ptr) + #define Ppmd_GetPtr(p, ptr) (ptr) + #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr) + +#else + + #define Ppmd_Ref_Type(type) UInt32 + #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) + #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs)) + +#endif // PPMD_32BIT + + +typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref; +typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref; +typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref; + + +/* +#ifdef MY_CPU_LE_UNALIGN +// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache. +#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0) +#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v) + +#else +*/ + +/* + We can write 16-bit halves to 32-bit (Successor) field in any selected order. + But the native order is more consistent way. + So we use the native order, if LE/BE order can be detected here at compile time. +*/ + +#ifdef MY_CPU_BE + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); } + +#else + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); } + +#endif + +// #endif + #define PPMD_SetAllBitsIn256Bytes(p) \ { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \ diff --git a/deps/LZMA-SDK/C/Ppmd7.c b/deps/LZMA-SDK/C/Ppmd7.c index 80e7de9a6..b6ecf1430 100644 --- a/deps/LZMA-SDK/C/Ppmd7.c +++ b/deps/LZMA-SDK/C/Ppmd7.c @@ -1,5 +1,5 @@ /* Ppmd7.c -- PPMdH codec -2018-07-04 : Igor Pavlov : Public domain +2021-04-13 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Precomp.h" @@ -8,7 +8,12 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Ppmd7.h" -const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */ +// #define PPMD7_ORDER_0_SUPPPORT + +MY_ALIGN(16) +static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; #define MAX_FREQ 124 @@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x #define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) #define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) -#define I2U(indx) (p->Indx2Units[indx]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) +#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx]) -#ifdef PPMD_32BIT - #define REF(ptr) (ptr) -#else - #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) -#endif +#define REF(ptr) Ppmd_GetRef(p, ptr) #define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) @@ -35,13 +37,7 @@ typedef CPpmd7_Context * CTX_PTR; struct CPpmd7_Node_; -typedef - #ifdef PPMD_32BIT - struct CPpmd7_Node_ * - #else - UInt32 - #endif - CPpmd7_Node_Ref; +typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref; typedef struct CPpmd7_Node_ { @@ -51,17 +47,13 @@ typedef struct CPpmd7_Node_ CPpmd7_Node_Ref Prev; } CPpmd7_Node; -#ifdef PPMD_32BIT - #define NODE(ptr) (ptr) -#else - #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs))) -#endif +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node) void Ppmd7_Construct(CPpmd7 *p) { unsigned i, k, m; - p->Base = 0; + p->Base = NULL; for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) { @@ -77,6 +69,7 @@ void Ppmd7_Construct(CPpmd7 *p) for (i = 0; i < 3; i++) p->NS2Indx[i] = (Byte)i; + for (m = i, k = 1; i < 256; i++) { p->NS2Indx[i] = (Byte)m; @@ -84,54 +77,63 @@ void Ppmd7_Construct(CPpmd7 *p) k = (++m) - 2; } - memset(p->HB2Flag, 0, 0x40); - memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40); + memcpy(p->ExpEscape, PPMD7_kExpEscape, 16); } + void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Base); p->Size = 0; - p->Base = 0; + p->Base = NULL; } + BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) { if (!p->Base || p->Size != size) { - size_t size2; Ppmd7_Free(p, alloc); - size2 = 0 - #ifndef PPMD_32BIT - + UNIT_SIZE - #endif - ; - p->AlignOffset = - #ifdef PPMD_32BIT - (4 - size) & 3; - #else - 4 - (size & 3); - #endif - if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0) + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) return False; p->Size = size; } return True; } + + +// ---------- Internal Memory Allocator ---------- + +/* We can use CPpmd7_Node in list of free units (as in Ppmd8) + But we still need one additional list walk pass in GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode() +*/ + +#define EMPTY_NODE 0 + + static void InsertNode(CPpmd7 *p, void *node, unsigned indx) { *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; + // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; + p->FreeList[indx] = REF(node); + } + static void *RemoveNode(CPpmd7 *p, unsigned indx) { CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); p->FreeList[indx] = *node; + // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]); + // p->FreeList[indx] = node->Next; return node; } + static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) { unsigned i, nu = I2U(oldIndx) - I2U(newIndx); @@ -144,123 +146,167 @@ static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) InsertNode(p, ptr, i); } -static void GlueFreeBlocks(CPpmd7 *p) + +/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ + +typedef union _CPpmd7_Node_Union { - #ifdef PPMD_32BIT - CPpmd7_Node headItem; - CPpmd7_Node_Ref head = &headItem; - #else - CPpmd7_Node_Ref head = p->AlignOffset + p->Size; - #endif - - CPpmd7_Node_Ref n = head; - unsigned i; + CPpmd7_Node Node; + CPpmd7_Node_Ref NextRef; +} CPpmd7_Node_Union; + +/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks() + we use single linked list similar to Ppmd8 code */ + +static void GlueFreeBlocks(CPpmd7 *p) +{ + /* + we use first UInt16 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0 + CPpmd7_Context { UInt16 NumStats; : NumStats != 0 + CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record + : Stamp == 1 for head record and guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record. + */ + CPpmd7_Node_Ref head, n = 0; + p->GlueCount = 255; - /* create doubly-linked list of free blocks */ - for (i = 0; i < PPMD_NUM_INDEXES; i++) + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1; + { - UInt16 nu = I2U(i); - CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; - p->FreeList[i] = 0; - while (next != 0) + /* Create list of free blocks. + We still need one additional list walk pass before Glue. */ + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) { - CPpmd7_Node *node = NODE(next); - node->Next = n; - n = NODE(n)->Prev = next; - next = *(const CPpmd7_Node_Ref *)node; - node->Stamp = 0; - node->NU = (UInt16)nu; + const UInt16 nu = I2U_UInt16(i); + CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + /* Don't change the order of the following commands: */ + CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next); + const CPpmd7_Node_Ref tmp = next; + next = un->NextRef; + un->Node.Stamp = EMPTY_NODE; + un->Node.NU = nu; + un->Node.Next = n; + n = tmp; + } } } - NODE(head)->Stamp = 1; - NODE(head)->Next = n; - NODE(n)->Prev = head; - if (p->LoUnit != p->HiUnit) - ((CPpmd7_Node *)p->LoUnit)->Stamp = 1; - - /* Glue free blocks */ - while (n != head) + + head = n; + /* Glue and Fill must walk the list in same direction */ { - CPpmd7_Node *node = NODE(n); - UInt32 nu = (UInt32)node->NU; - for (;;) + /* Glue free blocks */ + CPpmd7_Node_Ref *prev = &head; + while (n) { - CPpmd7_Node *node2 = NODE(n) + nu; - nu += node2->NU; - if (node2->Stamp != 0 || nu >= 0x10000) - break; - NODE(node2->Prev)->Next = node2->Next; - NODE(node2->Next)->Prev = node2->Prev; - node->NU = (UInt16)nu; + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + n = node->Next; + if (nu == 0) + { + *prev = n; + continue; + } + prev = &node->Next; + for (;;) + { + CPpmd7_Node *node2 = node + nu; + nu += node2->NU; + if (node2->Stamp != EMPTY_NODE || nu >= 0x10000) + break; + node->NU = (UInt16)nu; + node2->NU = 0; + } } - n = node->Next; } - + /* Fill lists of free blocks */ - for (n = NODE(head)->Next; n != head;) + for (n = head; n != 0;) { CPpmd7_Node *node = NODE(n); - unsigned nu; - CPpmd7_Node_Ref next = node->Next; - for (nu = node->NU; nu > 128; nu -= 128, node += 128) + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) InsertNode(p, node, PPMD_NUM_INDEXES - 1); if (I2U(i = U2I(nu)) != nu) { unsigned k = I2U(--i); - InsertNode(p, node + k, nu - k - 1); + InsertNode(p, node + k, (unsigned)nu - k - 1); } InsertNode(p, node, i); - n = next; } } + +MY_NO_INLINE static void *AllocUnitsRare(CPpmd7 *p, unsigned indx) { unsigned i; - void *retVal; + if (p->GlueCount == 0) { GlueFreeBlocks(p); if (p->FreeList[indx] != 0) return RemoveNode(p, indx); } + i = indx; + do { if (++i == PPMD_NUM_INDEXES) { UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; p->GlueCount--; - return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL); + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL; } } while (p->FreeList[i] == 0); - retVal = RemoveNode(p, i); - SplitBlock(p, retVal, i, indx); - return retVal; + + { + void *block = RemoveNode(p, i); + SplitBlock(p, block, i, indx); + return block; + } } + static void *AllocUnits(CPpmd7 *p, unsigned indx) { - UInt32 numBytes; if (p->FreeList[indx] != 0) return RemoveNode(p, indx); - numBytes = U2B(I2U(indx)); - if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit)) { - void *retVal = p->LoUnit; - p->LoUnit += numBytes; - return retVal; + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } } return AllocUnitsRare(p, indx); } + #define MyMem12Cpy(dest, src, num) \ - { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \ - do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); } + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + +/* static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU) { unsigned i0 = U2I(oldNU); @@ -277,20 +323,25 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU SplitBlock(p, oldPtr, i0, i1); return oldPtr; } +*/ -#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16))) +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) { - (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF); - (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF); + Ppmd_SET_SUCCESSOR(p, v); } -static void RestartModel(CPpmd7 *p) + + +MY_NO_INLINE +static +void RestartModel(CPpmd7 *p) { - unsigned i, k, m; + unsigned i, k; memset(p->FreeList, 0, sizeof(p->FreeList)); + p->Text = p->Base + p->AlignOffset; p->HiUnit = p->Text + p->Size; p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; @@ -300,57 +351,110 @@ static void RestartModel(CPpmd7 *p) p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; p->PrevSuccess = 0; - p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ - p->MinContext->Suffix = 0; - p->MinContext->NumStats = 256; - p->MinContext->SummFreq = 256 + 1; - p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ - p->LoUnit += U2B(256 / 2); - p->MinContext->Stats = REF(p->FoundState); - for (i = 0; i < 256; i++) { - CPpmd_State *s = &p->FoundState[i]; - s->Symbol = (Byte)i; - s->Freq = 1; - SetSuccessor(s, 0); + CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + + mc->NumStats = 256; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + #ifdef PPMD7_ORDER_0_SUPPPORT + if (p->MaxOrder == 0) + { + CPpmd_Void_Ref r = REF(mc); + s = p->FoundState; + for (i = 0; i < 256; i++, s++) + SetSuccessor(s, r); + return; + } + #endif } for (i = 0; i < 128; i++) + + + for (k = 0; k < 8; k++) { + unsigned m; UInt16 *dest = p->BinSumm[i] + k; UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2)); for (m = 0; m < 64; m += 8) dest[m] = val; } - + + for (i = 0; i < 25; i++) - for (k = 0; k < 16; k++) + { + + CPpmd_See *s = p->See[i]; + + + + unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 16; k++, s++) { - CPpmd_See *s = &p->See[i][k]; - s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4)); + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); s->Count = 4; } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ } + void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) { p->MaxOrder = maxOrder; + RestartModel(p); - p->DummySee.Shift = PPMD_PERIOD_BITS; - p->DummySee.Summ = 0; /* unused */ - p->DummySee.Count = 64; /* unused */ } -static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) + + +/* + CreateSuccessors() + It's called when (FoundState->Successor) is RAW-Successor, + that is the link to position in Raw text. + So we create Context records and write the links to + FoundState->Successor and to identical RAW-Successors in suffix + contexts of MinContex. + + The function returns: + if (OrderFall == 0) then MinContext is already at MAX order, + { return pointer to new or existing context of same MAX order } + else + { return pointer to new real context that will be (Order+1) in comparison with MinContext + + also it can return pointer to real context of same order, +*/ + +MY_NO_INLINE +static CTX_PTR CreateSuccessors(CPpmd7 *p) { - CPpmd_State upState; CTX_PTR c = p->MinContext; CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); - CPpmd_State *ps[PPMD7_MAX_ORDER]; + Byte newSym, newFreq; unsigned numPs = 0; - - if (!skip) + CPpmd_State *ps[PPMD7_MAX_ORDER]; + + if (p->OrderFall != 0) ps[numPs++] = p->FoundState; while (c->Suffix) @@ -358,44 +462,70 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) CPpmd_Void_Ref successor; CPpmd_State *s; c = SUFFIX(c); + + if (c->NumStats != 1) { - for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++); + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + } else + { s = ONE_STATE(c); + + } successor = SUCCESSOR(s); if (successor != upBranch) { + // (c) is real record Context here, c = CTX(successor); if (numPs == 0) + { + // (c) is real record MAX Order Context here, + // So we don't need to create any new contexts. return c; + } break; } ps[numPs++] = s; } - upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch); - SetSuccessor(&upState, upBranch + 1); + // All created contexts will have single-symbol with new RAW-Successor + // All new RAW-Successors will point to next position in RAW text + // after FoundState->Successor + + newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch); + upBranch++; + if (c->NumStats == 1) - upState.Freq = ONE_STATE(c)->Freq; + newFreq = ONE_STATE(c)->Freq; else { UInt32 cf, s0; CPpmd_State *s; - for (s = STATS(c); s->Symbol != upState.Symbol; s++); - cf = s->Freq - 1; - s0 = c->SummFreq - c->NumStats - cf; - upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0)))); + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + cf - is frequency of symbol that will be Successor in new context records. + s0 - is commulative frequency sum of another symbols from parent context. + max(newFreq)= (s->Freq + 1), when (s0 == 1) + we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[] + so (s->Freq < 128) - is requirement for multi-symbol contexts + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1)); } + // Create new single-symbol contexts from low order to high order in loop + do { - /* Create Child */ - CTX_PTR c1; /* = AllocContext(p); */ + CTX_PTR c1; + /* = AllocContext(p); */ if (p->HiUnit != p->LoUnit) - c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); + c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); else if (p->FreeList[0] != 0) c1 = (CTX_PTR)RemoveNode(p, 0); else @@ -404,8 +534,11 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) if (!c1) return NULL; } + c1->NumStats = 1; - *ONE_STATE(c1) = upState; + ONE_STATE(c1)->Symbol = newSym; + ONE_STATE(c1)->Freq = newFreq; + SetSuccessor(ONE_STATE(c1), upBranch); c1->Suffix = REF(c); SetSuccessor(ps[--numPs], REF(c1)); c = c1; @@ -415,21 +548,26 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) return c; } -static void SwapStates(CPpmd_State *t1, CPpmd_State *t2) -{ - CPpmd_State tmp = *t1; - *t1 = *t2; - *t2 = tmp; -} -static void UpdateModel(CPpmd7 *p) + +#define SwapStates(s) \ + { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } + + +void Ppmd7_UpdateModel(CPpmd7 *p); +MY_NO_INLINE +void Ppmd7_UpdateModel(CPpmd7 *p) { - CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState); - CTX_PTR c; + CPpmd_Void_Ref maxSuccessor, minSuccessor; + CTX_PTR c, mc; unsigned s0, ns; - + + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) { + /* Update Freqs in Suffix Context */ + c = SUFFIX(p->MinContext); if (c->NumStats == 1) @@ -441,27 +579,39 @@ static void UpdateModel(CPpmd7 *p) else { CPpmd_State *s = STATS(c); - if (s->Symbol != p->FoundState->Symbol) + Byte sym = p->FoundState->Symbol; + + if (s->Symbol != sym) { - do { s++; } while (s->Symbol != p->FoundState->Symbol); + do + { + // s++; if (s->Symbol == sym) break; + s++; + } + while (s->Symbol != sym); + if (s[0].Freq >= s[-1].Freq) { - SwapStates(&s[0], &s[-1]); + SwapStates(s); s--; } } + if (s->Freq < MAX_FREQ - 9) { - s->Freq += 2; - c->SummFreq += 2; + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); } } } + if (p->OrderFall == 0) { - p->MinContext = p->MaxContext = CreateSuccessors(p, True); - if (p->MinContext == 0) + /* MAX ORDER context */ + /* (FoundState->Successor) is RAW-Successor. */ + p->MaxContext = p->MinContext = CreateSuccessors(p); + if (!p->MinContext) { RestartModel(p); return; @@ -469,45 +619,93 @@ static void UpdateModel(CPpmd7 *p) SetSuccessor(p->FoundState, REF(p->MinContext)); return; } + + + /* NON-MAX ORDER context */ - *p->Text++ = p->FoundState->Symbol; - successor = REF(p->Text); - if (p->Text >= p->UnitsStart) { - RestartModel(p); - return; + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + RestartModel(p); + return; + } + maxSuccessor = REF(text); } - if (fSuccessor) + minSuccessor = SUCCESSOR(p->FoundState); + + if (minSuccessor) { - if (fSuccessor <= successor) + // there is Successor for FoundState in MinContext. + // So the next context will be one order higher than MinContext. + + if (minSuccessor <= maxSuccessor) { - CTX_PTR cs = CreateSuccessors(p, False); - if (cs == NULL) + // minSuccessor is RAW-Successor. So we will create real contexts records: + CTX_PTR cs = CreateSuccessors(p); + if (!cs) { RestartModel(p); return; } - fSuccessor = REF(cs); + minSuccessor = REF(cs); } + + // minSuccessor now is real Context pointer that points to existing (Order+1) context + if (--p->OrderFall == 0) { - successor = fSuccessor; + /* + if we move to MaxOrder context, then minSuccessor will be common Succesor for both: + MinContext that is (MaxOrder - 1) + MaxContext that is (MaxOrder) + so we don't need new RAW-Successor, and we can use real minSuccessor + as succssors for both MinContext and MaxContext. + */ + maxSuccessor = minSuccessor; + + /* + if (MaxContext != MinContext) + { + there was order fall from MaxOrder and we don't need current symbol + to transfer some RAW-Succesors to real contexts. + So we roll back pointer in raw data for one position. + } + */ p->Text -= (p->MaxContext != p->MinContext); } } else { - SetSuccessor(p->FoundState, successor); - fSuccessor = REF(p->MinContext); + /* + FoundState has NULL-Successor here. + And only root 0-order context can contain NULL-Successors. + We change Successor in FoundState to RAW-Successor, + And next context will be same 0-order root Context. + */ + SetSuccessor(p->FoundState, maxSuccessor); + minSuccessor = REF(p->MinContext); } - - s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1); - - for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c)) + + mc = p->MinContext; + c = p->MaxContext; + + p->MaxContext = p->MinContext = CTX(minSuccessor); + + if (c == mc) + return; + + // s0 : is pure Escape Freq + s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1); + + do { unsigned ns1; - UInt32 cf, sf; + UInt32 sum; + if ((ns1 = c->NumStats) != 1) { if ((ns1 & 1) == 0) @@ -527,80 +725,127 @@ static void UpdateModel(CPpmd7 *p) oldPtr = STATS(c); MyMem12Cpy(ptr, oldPtr, oldNU); InsertNode(p, oldPtr, i); - c->Stats = STATS_REF(ptr); + c->Union4.Stats = STATS_REF(ptr); } } - c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1))); + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 3 here. + total increase of Union2.SummFreq for all symbols is less than 256 here */ + sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ + // sum = (UInt16)sum; } else { + // instead of One-symbol context we create 2-symbol context CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0); if (!s) { RestartModel(p); return; } - *s = *ONE_STATE(c); - c->Stats = REF(s); - if (s->Freq < MAX_FREQ / 4 - 1) - s->Freq <<= 1; - else - s->Freq = MAX_FREQ - 4; - c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3)); - } - cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6); - sf = (UInt32)s0 + c->SummFreq; - if (cf < 6 * sf) - { - cf = 1 + (cf > sf) + (cf >= 4 * sf); - c->SummFreq += 3; - } - else - { - cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); - c->SummFreq = (UInt16)(c->SummFreq + cf); + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context + s->Freq = (Byte)freq; + // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here + sum = freq + p->InitEsc + (ns > 3); + } } + { CPpmd_State *s = STATS(c) + ns1; - SetSuccessor(s, successor); + UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq; + UInt32 sf = (UInt32)s0 + sum; s->Symbol = p->FoundState->Symbol; - s->Freq = (Byte)cf; c->NumStats = (UInt16)(ns1 + 1); + SetSuccessor(s, maxSuccessor); + + if (cf < 6 * sf) + { + cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf); + sum += 3; + /* It can add (0, 1, 2) to Escape_Freq */ + } + else + { + cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; } + c = SUFFIX(c); } - p->MaxContext = p->MinContext = CTX(fSuccessor); + while (c != mc); } + + +MY_NO_INLINE static void Rescale(CPpmd7 *p) { unsigned i, adder, sumFreq, escFreq; CPpmd_State *stats = STATS(p->MinContext); CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) { CPpmd_State tmp = *s; - for (; s != stats; s--) + do s[0] = s[-1]; + while (--s != stats); *s = tmp; } - escFreq = p->MinContext->SummFreq - s->Freq; - s->Freq += 4; - adder = (p->OrderFall != 0); - s->Freq = (Byte)((s->Freq + adder) >> 1); + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + /* + if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context + if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context + */ + + adder = (p->OrderFall != 0); + + #ifdef PPMD7_ORDER_0_SUPPPORT + adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = (unsigned)p->MinContext->NumStats - 1; + s->Freq = (Byte)sumFreq; - i = p->MinContext->NumStats - 1; do { - escFreq -= (++s)->Freq; - s->Freq = (Byte)((s->Freq + adder) >> 1); - sumFreq += s->Freq; - if (s[0].Freq > s[-1].Freq) + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) { + CPpmd_State tmp = *s; CPpmd_State *s1 = s; - CPpmd_State tmp = *s1; do + { s1[0] = s1[-1]; - while (--s1 != stats && tmp.Freq > s1[-1].Freq); + } + while (--s1 != stats && freq > s1[-1].Freq); *s1 = tmp; } } @@ -608,47 +853,89 @@ static void Rescale(CPpmd7 *p) if (s->Freq == 0) { - unsigned numStats = p->MinContext->NumStats; - unsigned n0, n1; - do { i++; } while ((--s)->Freq == 0); + /* Remove all items with Freq == 0 */ + CPpmd7_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + /* We increase (escFreq) for the number of removed symbols. + So we will have (0.5) increase for Escape_Freq in avarage per + removed symbol after Escape_Freq halving */ escFreq += i; - p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i); - if (p->MinContext->NumStats == 1) + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (UInt16)(numStatsNew); + n0 = (numStats + 1) >> 1; + + if (numStatsNew == 1) { - CPpmd_State tmp = *stats; + /* Create Single-Symbol context */ + unsigned freq = stats->Freq; + do { - tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1)); escFreq >>= 1; + freq = (freq + 1) >> 1; } while (escFreq > 1); - InsertNode(p, stats, U2I(((numStats + 1) >> 1))); - *(p->FoundState = ONE_STATE(p->MinContext)) = tmp; + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; // (freq <= 260 / 4) + p->FoundState = s; + InsertNode(p, stats, U2I(n0)); return; } - n0 = (numStats + 1) >> 1; - n1 = (p->MinContext->NumStats + 1) >> 1; + + n1 = (numStatsNew + 1) >> 1; if (n0 != n1) - p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + { + // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + unsigned i0 = U2I(n0); + unsigned i1 = U2I(n1); + if (i0 != i1) + { + if (p->FreeList[i1] != 0) + { + void *ptr = RemoveNode(p, i1); + p->MinContext->Union4.Stats = STATS_REF(ptr); + MyMem12Cpy(ptr, (const void *)stats, n1); + InsertNode(p, stats, i0); + } + else + SplitBlock(p, stats, i0, i1); + } + } + } + { + CPpmd7_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + // Escape_Freq halving here + p->FoundState = STATS(mc); } - p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); - p->FoundState = STATS(p->MinContext); } + CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) { CPpmd_See *see; - unsigned nonMasked = p->MinContext->NumStats - numMasked; - if (p->MinContext->NumStats != 256) + const CPpmd7_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 256) { - see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + - (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) + - 2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) + - 4 * (unsigned)(numMasked > nonMasked) + + unsigned nonMasked = numStats - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats) + + 4 * (unsigned)(numMasked > nonMasked) + p->HiBitsFlag; { - unsigned r = (see->Summ >> see->Shift); - see->Summ = (UInt16)(see->Summ - r); + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); *escFreq = r + (r == 0); } } @@ -660,53 +947,158 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) return see; } + static void NextContext(CPpmd7 *p) { CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); - if (p->OrderFall == 0 && (Byte *)c > p->Text) - p->MinContext = p->MaxContext = c; + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; else - UpdateModel(p); + Ppmd7_UpdateModel(p); } + void Ppmd7_Update1(CPpmd7 *p) { CPpmd_State *s = p->FoundState; - s->Freq += 4; - p->MinContext->SummFreq += 4; - if (s[0].Freq > s[-1].Freq) + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) { - SwapStates(&s[0], &s[-1]); + SwapStates(s); p->FoundState = --s; - if (s->Freq > MAX_FREQ) + if (freq > MAX_FREQ) Rescale(p); } NextContext(p); } + void Ppmd7_Update1_0(CPpmd7 *p) { - p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq); - p->RunLength += p->PrevSuccess; - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) + CPpmd_State *s = p->FoundState; + CPpmd7_Context *mc = p->MinContext; + unsigned freq = s->Freq; + unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq > summFreq); + p->RunLength += (int)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) Rescale(p); NextContext(p); } + +/* void Ppmd7_UpdateBin(CPpmd7 *p) { - p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0)); + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 128)); p->PrevSuccess = 1; p->RunLength++; NextContext(p); } +*/ void Ppmd7_Update2(CPpmd7 *p) { - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) - Rescale(p); + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; p->RunLength = p->InitRL; - UpdateModel(p); + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Rescale(p); + Ppmd7_UpdateModel(p); +} + + + +/* +PPMd Memory Map: +{ + [ 0 ] contains subset of original raw text, that is required to create context + records, Some symbols are not written, when max order context was reached + [ Text ] free area + [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records + [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items +[ HiUnit ] CPpmd7_Context records + [ Size ] end of array } + +These addresses don't cross at any time. +And the following condtions is true for addresses: + (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size) + +Raw text is BYTE--aligned. +the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs. + +Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. +The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. +The code doesn't free UNITs allocated for CPpmd7_Context records. + +The code calls RestartModel(), when there is no free memory for allocation. +And RestartModel() changes the state to orignal start state, with full free block. + + +The code allocates UNITs with the following order: + +Allocation of 1 UNIT for Context record + - from free space (HiUnit) down to (LoUnit) + - from FreeList[0] + - AllocUnitsRare() + +AllocUnits() for CPpmd_State vectors: + - from FreeList[i] + - from free space (LoUnit) up to (HiUnit) + - AllocUnitsRare() + +AllocUnitsRare() + - if (GlueCount == 0) + { Glue lists, GlueCount = 255, allocate from FreeList[i]] } + - loop for all higher sized FreeList[...] lists + - from (UnitsStart - Text), GlueCount-- + - ERROR + + +Each Record with Context contains the CPpmd_State vector, where each +CPpmd_State contains the link to Successor. +There are 3 types of Successor: + 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored + only in 0-order Root Context Record. + We use 0 value as NULL-Successor + 2) RAW-Successor - the link to position in raw text, + that "RAW-Successor" is being created after first + occurrence of new symbol for some existing context record. + (RAW-Successor > 0). + 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1), + that record is being created when we go via RAW-Successor again. + +For any successors at any time: the following condtions are true for Successor links: +(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor) + + +---------- Symbol Frequency, SummFreq and Range in Range_Coder ---------- + +CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq + +The PPMd code tries to fulfill the condition: + (SummFreq <= (256 * 128 = RC::kBot)) + +We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) +So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. +If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. +SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions. +Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for +max-order context. + +When the PPMd code still break (Total <= RC::Range) condition in range coder, +we have two ways to resolve that problem: + 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. + 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. +*/ diff --git a/deps/LZMA-SDK/C/Ppmd7.h b/deps/LZMA-SDK/C/Ppmd7.h index cce93f120..297e35fe9 100644 --- a/deps/LZMA-SDK/C/Ppmd7.h +++ b/deps/LZMA-SDK/C/Ppmd7.h @@ -1,10 +1,8 @@ -/* Ppmd7.h -- PPMdH compression codec -2018-07-04 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ - -/* This code supports virtual RangeDecoder and includes the implementation -of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H. -If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */ +/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #ifndef __PPMD7_H #define __PPMD7_H @@ -21,23 +19,56 @@ EXTERN_C_BEGIN struct CPpmd7_Context_; -typedef - #ifdef PPMD_32BIT - struct CPpmd7_Context_ * - #else - UInt32 - #endif - CPpmd7_Context_Ref; +typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref; + +// MY_CPU_pragma_pack_push_1 typedef struct CPpmd7_Context_ { UInt16 NumStats; - UInt16 SummFreq; - CPpmd_State_Ref Stats; + + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + CPpmd7_Context_Ref Suffix; } CPpmd7_Context; -#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq) +// MY_CPU_pragma_pop + +#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + + + + +typedef struct +{ + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteIn *Stream; +} CPpmd7_RangeDec; + + +typedef struct +{ + UInt32 Range; + Byte Cache; + // Byte _dummy_[3]; + UInt64 Low; + UInt64 CacheSize; + IByteOut *Stream; +} CPpmd7z_RangeEnc; + typedef struct { @@ -48,17 +79,30 @@ typedef struct UInt32 Size; UInt32 GlueCount; - Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; - Byte Indx2Units[PPMD_NUM_INDEXES]; + + + + union + { + CPpmd7_RangeDec dec; + CPpmd7z_RangeEnc enc; + } rc; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment Byte Units2Indx[128]; CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; - Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256]; + + Byte NS2BSIndx[256], NS2Indx[256]; + Byte ExpEscape[16]; CPpmd_See DummySee, See[25][16]; UInt16 BinSumm[128][64]; + // int LastSymbol; } CPpmd7; + void Ppmd7_Construct(CPpmd7 *p); BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc); void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc); @@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder); /* ---------- Internal Functions ---------- */ -extern const Byte PPMD7_kExpEscape[16]; - -#ifdef PPMD_32BIT - #define Ppmd7_GetPtr(p, ptr) (ptr) - #define Ppmd7_GetContext(p, ptr) (ptr) - #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats) -#else - #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs))) - #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs))) - #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats))) -#endif +#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context) +#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) void Ppmd7_Update1(CPpmd7 *p); void Ppmd7_Update1_0(CPpmd7 *p); void Ppmd7_Update2(CPpmd7 *p); -void Ppmd7_UpdateBin(CPpmd7 *p); + +#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3)) +#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4)) +// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3)) +// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4)) #define Ppmd7_GetBinSumm(p) \ - &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \ - p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \ - (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \ - 2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \ - ((p->RunLength >> 26) & 0x20)] + &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \ + + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \ + + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ] CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale); +/* +We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure: + 1) Ppmd7a_*: original PPMdH + 2) Ppmd7z_*: modified PPMdH with 7z Range Coder +Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH) +*/ + /* ---------- Decode ---------- */ -typedef struct IPpmd7_RangeDec IPpmd7_RangeDec; +#define PPMD7_SYM_END (-1) +#define PPMD7_SYM_ERROR (-2) -struct IPpmd7_RangeDec -{ - UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total); - void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size); - UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0); -}; +/* +You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init() -typedef struct -{ - IPpmd7_RangeDec vt; - UInt32 Range; - UInt32 Code; - IByteIn *Stream; -} CPpmd7z_RangeDec; +Ppmd7*_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD7_SYM_END : End of payload marker + -2 : PPMD7_SYM_ERROR : Data error +*/ -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p); -BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p); -#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +/* Ppmd7a_* : original PPMdH */ +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7a_DecodeSymbol(CPpmd7 *p); -int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc); +/* Ppmd7z_* : modified PPMdH with 7z Range Coder */ +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7z_DecodeSymbol(CPpmd7 *p); +// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim); /* ---------- Encode ---------- */ -typedef struct -{ - UInt64 Low; - UInt32 Range; - Byte Cache; - UInt64 CacheSize; - IByteOut *Stream; -} CPpmd7z_RangeEnc; - -void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p); -void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p); - -void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol); +void Ppmd7z_Init_RangeEnc(CPpmd7 *p); +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p); +// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol); +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim); EXTERN_C_END diff --git a/deps/LZMA-SDK/C/Ppmd7Dec.c b/deps/LZMA-SDK/C/Ppmd7Dec.c index 202640710..a18f0b873 100644 --- a/deps/LZMA-SDK/C/Ppmd7Dec.c +++ b/deps/LZMA-SDK/C/Ppmd7Dec.c @@ -1,6 +1,8 @@ -/* Ppmd7Dec.c -- PPMdH Decoder -2018-07-04 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ +/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #include "Precomp.h" @@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #define kTopValue (1 << 24) -BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) { unsigned i; p->Code = 0; p->Range = 0xFFFFFFFF; - if (IByteIn_Read(p->Stream) != 0) + if (READ_BYTE(p) != 0) return False; for (i = 0; i < 4; i++) - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); + p->Code = (p->Code << 8) | READ_BYTE(p); return (p->Code < 0xFFFFFFFF); } -#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt); - -static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total) -{ - GET_Ppmd7z_RangeDec - return p->Code / (p->Range /= total); -} +#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \ + { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8; -static void Range_Normalize(CPpmd7z_RangeDec *p) -{ - if (p->Range < kTopValue) - { - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); - p->Range <<= 8; - if (p->Range < kTopValue) - { - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); - p->Range <<= 8; - } - } -} +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} -static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size) -{ - GET_Ppmd7z_RangeDec - p->Code -= start * p->Range; - p->Range *= size; - Range_Normalize(p); -} +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) -static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0) -{ - GET_Ppmd7z_RangeDec - UInt32 newBound = (p->Range >> 14) * size0; - UInt32 symbol; - if (p->Code < newBound) - { - symbol = 0; - p->Range = newBound; - } - else - { - symbol = 1; - p->Code -= newBound; - p->Range -= newBound; - } - Range_Normalize(p); - return symbol; -} +#define R (&p->rc.dec) -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p) +MY_FORCE_INLINE +// MY_NO_INLINE +static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size) { - p->vt.GetThreshold = Range_GetThreshold; - p->vt.Decode = Range_Decode; - p->vt.DecodeBit = Range_DecodeBit; + + + R->Code -= start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(R) } +#define RC_Decode(start, size) RangeDec_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + -#define MASK(sym) ((signed char *)charMask)[sym] +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); -int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc) +#define MASK(sym) ((unsigned char *)charMask)[sym] +// MY_FORCE_INLINE +// static +int Ppmd7z_DecodeSymbol(CPpmd7 *p) { size_t charMask[256 / sizeof(size_t)]; + if (p->MinContext->NumStats != 1) { CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); unsigned i; UInt32 count, hiCnt; - if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq)) + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) { - Byte symbol; - rc->Decode(rc, 0, s->Freq); + Byte sym; + RC_DecodeFinal(0, s->Freq); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update1_0(p); - return symbol; + return sym; } + p->PrevSuccess = 0; - i = p->MinContext->NumStats - 1; + i = (unsigned)p->MinContext->NumStats - 1; + do { - if ((hiCnt += (++s)->Freq) > count) + if ((Int32)(count -= (++s)->Freq) < 0) { - Byte symbol; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update1(p); - return symbol; + return sym; } } while (--i); - if (count >= p->MinContext->SummFreq) - return -2; - p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]; - rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt); + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - i = p->MinContext->NumStats - 1; - do { MASK((--s)->Symbol) = 0; } while (--i); + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } } else { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); UInt16 *prob = Ppmd7_GetBinSumm(p); - if (rc->DecodeBit(rc, *prob) == 0) + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) { - Byte symbol; - *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob); - symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; - Ppmd7_UpdateBin(p); - return symbol; + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM_1(R) + /* we can use single byte normalization here because of + (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */ + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; } - *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob); - p->InitEsc = PPMD7_kExpEscape[*prob >> 10]; + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + + R->Code -= size0; + R->Range -= size0; + RC_NORM_LOCAL(R) + PPMD_SetAllBitsIn256Bytes(charMask); MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; p->PrevSuccess = 0; } + for (;;) { - CPpmd_State *ps[256], *s; + CPpmd_State *s, *s2; UInt32 freqSum, count, hiCnt; + CPpmd_See *see; - unsigned i, num, numMasked = p->MinContext->NumStats; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + do { p->OrderFall++; - if (!p->MinContext->Suffix) - return -1; - p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix); + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); } - while (p->MinContext->NumStats == numMasked); - hiCnt = 0; - s = Ppmd7_GetStats(p, p->MinContext); - i = 0; - num = p->MinContext->NumStats - numMasked; - do + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + { - int k = (int)(MASK(s->Symbol)); - hiCnt += (s->Freq & k); - ps[i] = s++; - i -= k; + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); } - while (i != num); - + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); freqSum += hiCnt; - count = rc->GetThreshold(rc, freqSum); + + + + + count = RC_GetThreshold(freqSum); if (count < hiCnt) { - Byte symbol; - CPpmd_State **pps = ps; - for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++); - s = *pps; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + }; + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); + + // new (see->Summ) value can overflow over 16-bits in some rare cases Ppmd_See_Update(see); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update2(p); - return symbol; + return sym; } + if (count >= freqSum) - return -2; - rc->Decode(rc, hiCnt, freqSum - hiCnt); + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt); + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases see->Summ = (UInt16)(see->Summ + freqSum); - do { MASK(ps[--i]->Symbol) = 0; } while (i != 0); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +/* +Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) +{ + int sym = 0; + if (buf != lim) + do + { + sym = Ppmd7z_DecodeSymbol(p); + if (sym < 0) + break; + *buf = (Byte)sym; } + while (++buf < lim); + p->LastSymbol = sym; + return buf; } +*/ diff --git a/deps/LZMA-SDK/C/Ppmd7Enc.c b/deps/LZMA-SDK/C/Ppmd7Enc.c index a74d3002b..6af1ec15e 100644 --- a/deps/LZMA-SDK/C/Ppmd7Enc.c +++ b/deps/LZMA-SDK/C/Ppmd7Enc.c @@ -1,6 +1,8 @@ -/* Ppmd7Enc.c -- PPMdH Encoder -2017-04-03 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ +/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #include "Precomp.h" @@ -8,65 +10,60 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #define kTopValue (1 << 24) -void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p) +#define R (&p->rc.enc) + +void Ppmd7z_Init_RangeEnc(CPpmd7 *p) { - p->Low = 0; - p->Range = 0xFFFFFFFF; - p->Cache = 0; - p->CacheSize = 1; + R->Low = 0; + R->Range = 0xFFFFFFFF; + R->Cache = 0; + R->CacheSize = 1; } -static void RangeEnc_ShiftLow(CPpmd7z_RangeEnc *p) +MY_NO_INLINE +static void RangeEnc_ShiftLow(CPpmd7 *p) { - if ((UInt32)p->Low < (UInt32)0xFF000000 || (unsigned)(p->Low >> 32) != 0) + if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0) { - Byte temp = p->Cache; + Byte temp = R->Cache; do { - IByteOut_Write(p->Stream, (Byte)(temp + (Byte)(p->Low >> 32))); + IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32))); temp = 0xFF; } - while (--p->CacheSize != 0); - p->Cache = (Byte)((UInt32)p->Low >> 24); + while (--R->CacheSize != 0); + R->Cache = (Byte)((UInt32)R->Low >> 24); } - p->CacheSize++; - p->Low = (UInt32)p->Low << 8; + R->CacheSize++; + R->Low = (UInt32)((UInt32)R->Low << 8); } -static void RangeEnc_Encode(CPpmd7z_RangeEnc *p, UInt32 start, UInt32 size, UInt32 total) -{ - p->Low += start * (p->Range /= total); - p->Range *= size; - while (p->Range < kTopValue) - { - p->Range <<= 8; - RangeEnc_ShiftLow(p); - } -} +#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p); +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} -static void RangeEnc_EncodeBit_0(CPpmd7z_RangeEnc *p, UInt32 size0) -{ - p->Range = (p->Range >> 14) * size0; - while (p->Range < kTopValue) - { - p->Range <<= 8; - RangeEnc_ShiftLow(p); - } -} +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +/* +#define RangeEnc_Encode(p, start, _size_) \ + { UInt32 size = _size_; \ + R->Low += start * R->Range; \ + R->Range *= size; \ + RC_NORM_LOCAL(p); } +*/ -static void RangeEnc_EncodeBit_1(CPpmd7z_RangeEnc *p, UInt32 size0) +MY_FORCE_INLINE +// MY_NO_INLINE +static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size) { - UInt32 newBound = (p->Range >> 14) * size0; - p->Low += newBound; - p->Range -= newBound; - while (p->Range < kTopValue) - { - p->Range <<= 8; - RangeEnc_ShiftLow(p); - } + R->Low += start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(p); } -void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p) +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p) { unsigned i; for (i = 0; i < 5; i++) @@ -74,31 +71,53 @@ void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p) } -#define MASK(sym) ((signed char *)charMask)[sym] -void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol) +#define RC_Encode(start, size) RangeEnc_Encode(p, start, size); +#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p); + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +#define SUFFIX(ctx) CTX((ctx)->Suffix) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) + +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] + +MY_FORCE_INLINE +static +void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) { size_t charMask[256 / sizeof(size_t)]; + if (p->MinContext->NumStats != 1) { CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); UInt32 sum; unsigned i; + + + + + R->Range /= p->MinContext->Union2.SummFreq; + if (s->Symbol == symbol) { - RangeEnc_Encode(rc, 0, s->Freq, p->MinContext->SummFreq); + // R->Range /= p->MinContext->Union2.SummFreq; + RC_EncodeFinal(0, s->Freq); p->FoundState = s; Ppmd7_Update1_0(p); return; } p->PrevSuccess = 0; sum = s->Freq; - i = p->MinContext->NumStats - 1; + i = (unsigned)p->MinContext->NumStats - 1; do { if ((++s)->Symbol == symbol) { - RangeEnc_Encode(rc, sum, s->Freq, p->MinContext->SummFreq); + // R->Range /= p->MinContext->Union2.SummFreq; + RC_EncodeFinal(sum, s->Freq); p->FoundState = s; Ppmd7_Update1(p); return; @@ -106,82 +125,199 @@ void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol) sum += s->Freq; } while (--i); + + // R->Range /= p->MinContext->Union2.SummFreq; + RC_Encode(sum, p->MinContext->Union2.SummFreq - sum); - p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]; + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - i = p->MinContext->NumStats - 1; - do { MASK((--s)->Symbol) = 0; } while (--i); - RangeEnc_Encode(rc, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq); + // MASK(s->Symbol) = 0; + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } } else { UInt16 *prob = Ppmd7_GetBinSumm(p); CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt32 pr = *prob; + UInt32 bound = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); if (s->Symbol == symbol) { - RangeEnc_EncodeBit_0(rc, *prob); - *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob); - p->FoundState = s; - Ppmd7_UpdateBin(p); + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + // RangeEnc_EncodeBit_0(p, bound); + R->Range = bound; + RC_NORM_1(p); + + // p->FoundState = s; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } return; } - else - { - RangeEnc_EncodeBit_1(rc, *prob); - *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob); - p->InitEsc = PPMD7_kExpEscape[*prob >> 10]; - PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - p->PrevSuccess = 0; - } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + // RangeEnc_EncodeBit_1(p, bound); + R->Low += bound; + R->Range -= bound; + RC_NORM_LOCAL(p) + + PPMD_SetAllBitsIn256Bytes(charMask); + MASK(s->Symbol) = 0; + p->PrevSuccess = 0; } + for (;;) { - UInt32 escFreq; CPpmd_See *see; CPpmd_State *s; - UInt32 sum; - unsigned i, numMasked = p->MinContext->NumStats; + UInt32 sum, escFreq; + CPpmd7_Context *mc; + unsigned i, numMasked; + + RC_NORM_REMOTE(p) + + mc = p->MinContext; + numMasked = mc->NumStats; + do { p->OrderFall++; - if (!p->MinContext->Suffix) + if (!mc->Suffix) return; /* EndMarker (symbol = -1) */ - p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix); + mc = Ppmd7_GetContext(p, mc->Suffix); + i = mc->NumStats; } - while (p->MinContext->NumStats == numMasked); + while (i == numMasked); + + p->MinContext = mc; - see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq); - s = Ppmd7_GetStats(p, p->MinContext); + // see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq); + { + if (i != 256) + { + unsigned nonMasked = i - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + p->HiBitsFlag + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * i) + + 4 * (unsigned)(numMasked > nonMasked); + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + escFreq = 1; + } + } + + s = Ppmd7_GetStats(p, mc); sum = 0; - i = p->MinContext->NumStats; + // i = mc->NumStats; + do { - int cur = s->Symbol; - if (cur == symbol) + unsigned cur = s->Symbol; + if ((int)cur == symbol) { UInt32 low = sum; - CPpmd_State *s1 = s; - do + UInt32 freq = s->Freq; + unsigned num2; + + Ppmd_See_Update(see); + p->FoundState = s; + sum += escFreq; + + num2 = i / 2; + i &= 1; + sum += freq & (0 - (UInt32)i); + if (num2 != 0) { - sum += (s->Freq & (int)(MASK(s->Symbol))); - s++; + s += i; + for (;;) + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + sum += (s[-2].Freq & (unsigned)(MASK(sym0))); + sum += (s[-1].Freq & (unsigned)(MASK(sym1))); + if (--num2 == 0) + break; + } } - while (--i); - RangeEnc_Encode(rc, low, s1->Freq, sum + escFreq); - Ppmd_See_Update(see); - p->FoundState = s1; + + + R->Range /= sum; + RC_EncodeFinal(low, freq); Ppmd7_Update2(p); return; } - sum += (s->Freq & (int)(MASK(cur))); - MASK(cur) = 0; + sum += (s->Freq & (unsigned)(MASK(cur))); s++; } while (--i); - RangeEnc_Encode(rc, sum, escFreq, sum + escFreq); - see->Summ = (UInt16)(see->Summ + sum + escFreq); + { + UInt32 total = sum + escFreq; + see->Summ = (UInt16)(see->Summ + total); + + R->Range /= total; + RC_Encode(sum, escFreq); + } + + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + s--; + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } +} + + +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim) +{ + for (; buf < lim; buf++) + { + Ppmd7z_EncodeSymbol(p, *buf); } } diff --git a/deps/LZMA-SDK/C/Sha256.c b/deps/LZMA-SDK/C/Sha256.c index 90994e5ab..c03b75afe 100644 --- a/deps/LZMA-SDK/C/Sha256.c +++ b/deps/LZMA-SDK/C/Sha256.c @@ -1,5 +1,5 @@ -/* Crypto/Sha256.c -- SHA-256 Hash -2017-04-03 : Igor Pavlov : Public domain +/* Sha256.c -- SHA-256 Hash +2021-04-01 : Igor Pavlov : Public domain This code is based on public domain code from Wei Dai's Crypto++ library. */ #include "Precomp.h" @@ -10,16 +10,107 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */ #include "RotateDefs.h" #include "Sha256.h" +#if defined(_MSC_VER) && (_MSC_VER < 1900) +// #define USE_MY_MM +#endif + +#ifdef MY_CPU_X86_OR_AMD64 + #ifdef _MSC_VER + #if _MSC_VER >= 1200 + #define _SHA_SUPPORTED + #endif + #elif defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define _SHA_SUPPORTED + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 8) // fix that check + #define _SHA_SUPPORTED + #endif + #elif defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1800) // fix that check + #define _SHA_SUPPORTED + #endif + #endif +#elif defined(MY_CPU_ARM_OR_ARM64) + #ifdef _MSC_VER + #if _MSC_VER >= 1910 + #define _SHA_SUPPORTED + #endif + #elif defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define _SHA_SUPPORTED + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define _SHA_SUPPORTED + #endif + #endif +#endif + +void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +#ifdef _SHA_SUPPORTED + void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); + + static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks; + static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW; + + #define UPDATE_BLOCKS(p) p->func_UpdateBlocks +#else + #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks +#endif + + +BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo) +{ + SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks; + + #ifdef _SHA_SUPPORTED + if (algo != SHA256_ALGO_SW) + { + if (algo == SHA256_ALGO_DEFAULT) + func = g_FUNC_UPDATE_BLOCKS; + else + { + if (algo != SHA256_ALGO_HW) + return False; + func = g_FUNC_UPDATE_BLOCKS_HW; + if (!func) + return False; + } + } + #else + if (algo > 1) + return False; + #endif + + p->func_UpdateBlocks = func; + return True; +} + + /* define it for speed optimization */ -#ifndef _SFX -#define _SHA256_UNROLL -#define _SHA256_UNROLL2 + +#ifdef _SFX + #define STEP_PRE 1 + #define STEP_MAIN 1 +#else + #define STEP_PRE 2 + #define STEP_MAIN 4 + // #define _SHA256_UNROLL #endif -/* #define _SHA256_UNROLL2 */ +#if STEP_MAIN != 16 + #define _SHA256_BIG_W +#endif -void Sha256_Init(CSha256 *p) + + + +void Sha256_InitState(CSha256 *p) { + p->count = 0; p->state[0] = 0x6a09e667; p->state[1] = 0xbb67ae85; p->state[2] = 0x3c6ef372; @@ -28,7 +119,17 @@ void Sha256_Init(CSha256 *p) p->state[5] = 0x9b05688c; p->state[6] = 0x1f83d9ab; p->state[7] = 0x5be0cd19; - p->count = 0; +} + +void Sha256_Init(CSha256 *p) +{ + p->func_UpdateBlocks = + #ifdef _SHA_SUPPORTED + g_FUNC_UPDATE_BLOCKS; + #else + NULL; + #endif + Sha256_InitState(p); } #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22)) @@ -36,61 +137,100 @@ void Sha256_Init(CSha256 *p) #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3)) #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10)) -#define blk0(i) (W[i]) -#define blk2(i) (W[i] += s1(W[((i)-2)&15]) + W[((i)-7)&15] + s0(W[((i)-15)&15])) - #define Ch(x,y,z) (z^(x&(y^z))) #define Maj(x,y,z) ((x&y)|(z&(x|y))) -#ifdef _SHA256_UNROLL2 - -#define R(a,b,c,d,e,f,g,h, i) \ - h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \ - d += h; \ - h += S0(a) + Maj(a, b, c) -#define RX_8(i) \ - R(a,b,c,d,e,f,g,h, i); \ - R(h,a,b,c,d,e,f,g, i+1); \ - R(g,h,a,b,c,d,e,f, i+2); \ - R(f,g,h,a,b,c,d,e, i+3); \ - R(e,f,g,h,a,b,c,d, i+4); \ - R(d,e,f,g,h,a,b,c, i+5); \ - R(c,d,e,f,g,h,a,b, i+6); \ - R(b,c,d,e,f,g,h,a, i+7) +#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4)) -#define RX_16 RX_8(0); RX_8(8); +#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15)) +#ifdef _SHA256_BIG_W + // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned. + #define w(j, i) W[(size_t)(j) + i] + #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i)) #else + #if STEP_MAIN == 16 + #define w(j, i) W[(i) & 15] + #else + #define w(j, i) W[((size_t)(j) + (i)) & 15] + #endif + #define blk2(j, i) (w(j, i) += blk2_main(j, i)) +#endif -#define a(i) T[(0-(i))&7] -#define b(i) T[(1-(i))&7] -#define c(i) T[(2-(i))&7] -#define d(i) T[(3-(i))&7] -#define e(i) T[(4-(i))&7] -#define f(i) T[(5-(i))&7] -#define g(i) T[(6-(i))&7] -#define h(i) T[(7-(i))&7] - -#define R(i) \ - h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \ - d(i) += h(i); \ - h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) \ +#define W_MAIN(i) blk2(j, i) -#ifdef _SHA256_UNROLL -#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7); -#define RX_16 RX_8(0); RX_8(8); +#define T1(wx, i) \ + tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + h = g; \ + g = f; \ + f = e; \ + e = d + tmp; \ + tmp += S0(a) + Maj(a, b, c); \ + d = c; \ + c = b; \ + b = a; \ + a = tmp; \ -#else +#define R1_PRE(i) T1( W_PRE, i) +#define R1_MAIN(i) T1( W_MAIN, i) -#define RX_16 unsigned i; for (i = 0; i < 16; i++) { R(i); } +#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4) +#define R2_MAIN(i) \ + R1_MAIN(i) \ + R1_MAIN(i + 1) \ #endif + + +#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 + +#define T4( a,b,c,d,e,f,g,h, wx, i) \ + h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + tmp = h; \ + h += d; \ + d = tmp + S0(a) + Maj(a, b, c); \ + +#define R4( wx, i) \ + T4 ( a,b,c,d,e,f,g,h, wx, (i )); \ + T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \ + T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \ + T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \ + +#define R4_PRE(i) R4( W_PRE, i) +#define R4_MAIN(i) R4( W_MAIN, i) + + +#define T8( a,b,c,d,e,f,g,h, wx, i) \ + h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + d += h; \ + h += S0(a) + Maj(a, b, c); \ + +#define R8( wx, i) \ + T8 ( a,b,c,d,e,f,g,h, wx, i ); \ + T8 ( h,a,b,c,d,e,f,g, wx, i+1); \ + T8 ( g,h,a,b,c,d,e,f, wx, i+2); \ + T8 ( f,g,h,a,b,c,d,e, wx, i+3); \ + T8 ( e,f,g,h,a,b,c,d, wx, i+4); \ + T8 ( d,e,f,g,h,a,b,c, wx, i+5); \ + T8 ( c,d,e,f,g,h,a,b, wx, i+6); \ + T8 ( b,c,d,e,f,g,h,a, wx, i+7); \ + +#define R8_PRE(i) R8( W_PRE, i) +#define R8_MAIN(i) R8( W_MAIN, i) + #endif -static const UInt32 K[64] = { +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); + +// static +extern MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; + +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -109,30 +249,27 @@ static const UInt32 K[64] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; -static void Sha256_WriteByteBlock(CSha256 *p) -{ - UInt32 W[16]; - unsigned j; - UInt32 *state; +#define K SHA256_K_ARRAY - #ifdef _SHA256_UNROLL2 - UInt32 a,b,c,d,e,f,g,h; + +MY_NO_INLINE +void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + UInt32 W + #ifdef _SHA256_BIG_W + [64]; #else - UInt32 T[8]; + [16]; #endif - for (j = 0; j < 16; j += 4) - { - const Byte *ccc = p->buffer + j * 4; - W[j ] = GetBe32(ccc); - W[j + 1] = GetBe32(ccc + 4); - W[j + 2] = GetBe32(ccc + 8); - W[j + 3] = GetBe32(ccc + 12); - } + unsigned j; - state = p->state; + UInt32 a,b,c,d,e,f,g,h; - #ifdef _SHA256_UNROLL2 + #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4) + UInt32 tmp; + #endif + a = state[0]; b = state[1]; c = state[2]; @@ -141,39 +278,96 @@ static void Sha256_WriteByteBlock(CSha256 *p) f = state[5]; g = state[6]; h = state[7]; - #else - for (j = 0; j < 8; j++) - T[j] = state[j]; - #endif - for (j = 0; j < 64; j += 16) + while (numBlocks) { - RX_16 + + for (j = 0; j < 16; j += STEP_PRE) + { + #if STEP_PRE > 4 + + #if STEP_PRE < 8 + R4_PRE(0); + #else + R8_PRE(0); + #if STEP_PRE == 16 + R8_PRE(8); + #endif + #endif + + #else + + R1_PRE(0); + #if STEP_PRE >= 2 + R1_PRE(1); + #if STEP_PRE >= 4 + R1_PRE(2); + R1_PRE(3); + #endif + #endif + + #endif + } + + for (j = 16; j < 64; j += STEP_MAIN) + { + #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 + + #if STEP_MAIN < 8 + R4_MAIN(0); + #else + R8_MAIN(0); + #if STEP_MAIN == 16 + R8_MAIN(8); + #endif + #endif + + #else + + R1_MAIN(0); + #if STEP_MAIN >= 2 + R1_MAIN(1); + #if STEP_MAIN >= 4 + R2_MAIN(2); + #if STEP_MAIN >= 8 + R2_MAIN(4); + R2_MAIN(6); + #if STEP_MAIN >= 16 + R2_MAIN(8); + R2_MAIN(10); + R2_MAIN(12); + R2_MAIN(14); + #endif + #endif + #endif + #endif + #endif + } + + a += state[0]; state[0] = a; + b += state[1]; state[1] = b; + c += state[2]; state[2] = c; + d += state[3]; state[3] = d; + e += state[4]; state[4] = e; + f += state[5]; state[5] = f; + g += state[6]; state[6] = g; + h += state[7]; state[7] = h; + + data += 64; + numBlocks--; } - #ifdef _SHA256_UNROLL2 - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; - state[4] += e; - state[5] += f; - state[6] += g; - state[7] += h; - #else - for (j = 0; j < 8; j++) - state[j] += T[j]; - #endif - /* Wipe variables */ /* memset(W, 0, sizeof(W)); */ - /* memset(T, 0, sizeof(T)); */ } #undef S0 #undef S1 #undef s0 #undef s1 +#undef K + +#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1) void Sha256_Update(CSha256 *p, const Byte *data, size_t size) { @@ -193,25 +387,26 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size) return; } - size -= num; - memcpy(p->buffer + pos, data, num); - data += num; + if (pos != 0) + { + size -= num; + memcpy(p->buffer + pos, data, num); + data += num; + Sha256_UpdateBlock(p); + } } - - for (;;) { - Sha256_WriteByteBlock(p); - if (size < 64) - break; - size -= 64; - memcpy(p->buffer, data, 64); - data += 64; - } - - if (size != 0) + size_t numBlocks = size >> 6; + UPDATE_BLOCKS(p)(p->state, data, numBlocks); + size &= 0x3F; + if (size == 0) + return; + data += (numBlocks << 6); memcpy(p->buffer, data, size); + } } + void Sha256_Final(CSha256 *p, Byte *digest) { unsigned pos = (unsigned)p->count & 0x3F; @@ -219,13 +414,30 @@ void Sha256_Final(CSha256 *p, Byte *digest) p->buffer[pos++] = 0x80; - while (pos != (64 - 8)) + if (pos > (64 - 8)) + { + while (pos != 64) { p->buffer[pos++] = 0; } + // memset(&p->buf.buffer[pos], 0, 64 - pos); + Sha256_UpdateBlock(p); + pos = 0; + } + + /* + if (pos & 3) { - pos &= 0x3F; - if (pos == 0) - Sha256_WriteByteBlock(p); - p->buffer[pos++] = 0; + p->buffer[pos] = 0; + p->buffer[pos + 1] = 0; + p->buffer[pos + 2] = 0; + pos += 3; + pos &= ~3; } + { + for (; pos < 64 - 8; pos += 4) + *(UInt32 *)(&p->buffer[pos]) = 0; + } + */ + + memset(&p->buffer[pos], 0, (64 - 8) - pos); { UInt64 numBits = (p->count << 3); @@ -233,16 +445,42 @@ void Sha256_Final(CSha256 *p, Byte *digest) SetBe32(p->buffer + 64 - 4, (UInt32)(numBits)); } - Sha256_WriteByteBlock(p); + Sha256_UpdateBlock(p); for (i = 0; i < 8; i += 2) { UInt32 v0 = p->state[i]; - UInt32 v1 = p->state[i + 1]; + UInt32 v1 = p->state[(size_t)i + 1]; SetBe32(digest , v0); SetBe32(digest + 4, v1); digest += 8; } - Sha256_Init(p); + Sha256_InitState(p); +} + + +void Sha256Prepare() +{ + #ifdef _SHA_SUPPORTED + SHA256_FUNC_UPDATE_BLOCKS f, f_hw; + f = Sha256_UpdateBlocks; + f_hw = NULL; + #ifdef MY_CPU_X86_OR_AMD64 + #ifndef USE_MY_MM + if (CPU_IsSupported_SHA() + && CPU_IsSupported_SSSE3() + // && CPU_IsSupported_SSE41() + ) + #endif + #else + if (CPU_IsSupported_SHA2()) + #endif + { + // printf("\n========== HW SHA256 ======== \n"); + f = f_hw = Sha256_UpdateBlocks_HW; + } + g_FUNC_UPDATE_BLOCKS = f; + g_FUNC_UPDATE_BLOCKS_HW = f_hw; + #endif } diff --git a/deps/LZMA-SDK/C/Sha256.h b/deps/LZMA-SDK/C/Sha256.h index 7f17ccf9c..f52933986 100644 --- a/deps/LZMA-SDK/C/Sha256.h +++ b/deps/LZMA-SDK/C/Sha256.h @@ -1,26 +1,76 @@ /* Sha256.h -- SHA-256 Hash -2013-01-18 : Igor Pavlov : Public domain */ +2021-01-01 : Igor Pavlov : Public domain */ -#ifndef __CRYPTO_SHA256_H -#define __CRYPTO_SHA256_H +#ifndef __7Z_SHA256_H +#define __7Z_SHA256_H #include "7zTypes.h" EXTERN_C_BEGIN -#define SHA256_DIGEST_SIZE 32 +#define SHA256_NUM_BLOCK_WORDS 16 +#define SHA256_NUM_DIGEST_WORDS 8 + +#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4) +#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4) + +typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks); + +/* + if (the system supports different SHA256 code implementations) + { + (CSha256::func_UpdateBlocks) will be used + (CSha256::func_UpdateBlocks) can be set by + Sha256_Init() - to default (fastest) + Sha256_SetFunction() - to any algo + } + else + { + (CSha256::func_UpdateBlocks) is ignored. + } +*/ typedef struct { - UInt32 state[8]; + SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks; UInt64 count; - Byte buffer[64]; + UInt64 __pad_2[2]; + UInt32 state[SHA256_NUM_DIGEST_WORDS]; + + Byte buffer[SHA256_BLOCK_SIZE]; } CSha256; + +#define SHA256_ALGO_DEFAULT 0 +#define SHA256_ALGO_SW 1 +#define SHA256_ALGO_HW 2 + +/* +Sha256_SetFunction() +return: + 0 - (algo) value is not supported, and func_UpdateBlocks was not changed + 1 - func_UpdateBlocks was set according (algo) value. +*/ + +BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo); + +void Sha256_InitState(CSha256 *p); void Sha256_Init(CSha256 *p); void Sha256_Update(CSha256 *p, const Byte *data, size_t size); void Sha256_Final(CSha256 *p, Byte *digest); + + + +// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +/* +call Sha256Prepare() once at program start. +It prepares all supported implementations, and detects the fastest implementation. +*/ + +void Sha256Prepare(void); + EXTERN_C_END #endif diff --git a/deps/LZMA-SDK/C/Sha256Opt.c b/deps/LZMA-SDK/C/Sha256Opt.c new file mode 100644 index 000000000..cc8c53e1b --- /dev/null +++ b/deps/LZMA-SDK/C/Sha256Opt.c @@ -0,0 +1,373 @@ +/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions +2021-04-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#if defined(_MSC_VER) +#if (_MSC_VER < 1900) && (_MSC_VER >= 1200) +// #define USE_MY_MM +#endif +#endif + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_SHA + #ifndef __SHA__ + #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) + #if defined(_MSC_VER) + // SSSE3: for clang-cl: + #include + #define __SHA__ + #endif + #endif + + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 8) // fix that check + #define USE_HW_SHA + #ifndef __SHA__ + #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) + // #pragma GCC target("sha,ssse3") + #endif + #endif + #elif defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1800) // fix that check + #define USE_HW_SHA + #endif + #elif defined(_MSC_VER) + #ifdef USE_MY_MM + #define USE_VER_MIN 1300 + #else + #define USE_VER_MIN 1910 + #endif + #if _MSC_VER >= USE_VER_MIN + #define USE_HW_SHA + #endif + #endif +// #endif // MY_CPU_X86_OR_AMD64 + +#ifdef USE_HW_SHA + +// #pragma message("Sha256 HW") +// #include + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +#include +#else +#include + +#if defined(_MSC_VER) && (_MSC_VER >= 1600) +// #include +#endif + +#ifdef USE_MY_MM +#include "My_mm.h" +#endif + +#endif + +/* +SHA256 uses: +SSE2: + _mm_loadu_si128 + _mm_storeu_si128 + _mm_set_epi32 + _mm_add_epi32 + _mm_shuffle_epi32 / pshufd + + + +SSSE3: + _mm_shuffle_epi8 / pshufb + _mm_alignr_epi8 +SHA: + _mm_sha256* +*/ + +// K array must be aligned for 16-bytes at least. +// The compiler can look align attribute and selects +// movdqu - for code without align attribute +// movdqa - for code with align attribute +extern +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; + +#define K SHA256_K_ARRAY + + +#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); +#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); +#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); + + +#define LOAD_SHUFFLE(m, k) \ + m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ + m = _mm_shuffle_epi8(m, mask); \ + +#define SM1(g0, g1, g2, g3) \ + SHA256_MSG1(g3, g0); \ + +#define SM2(g0, g1, g2, g3) \ + tmp = _mm_alignr_epi8(g1, g0, 4); \ + ADD_EPI32(g2, tmp); \ + SHA25G_MSG2(g2, g1); \ + +// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k) +// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1) + + +#define NNN(g0, g1, g2, g3) + + +#define RND2(t0, t1) \ + t0 = _mm_sha256rnds2_epu32(t0, t1, msg); + +#define RND2_0(m, k) \ + msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \ + RND2(state0, state1); \ + msg = _mm_shuffle_epi32(msg, 0x0E); \ + + +#define RND2_1 \ + RND2(state1, state0); \ + + +// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2 + +#define R4(k, g0, g1, g2, g3, OP0, OP1) \ + RND2_0(g0, k); \ + OP0(g0, g1, g2, g3); \ + RND2_1; \ + OP1(g0, g1, g2, g3); \ + +#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ + R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ + +#define PREPARE_STATE \ + tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \ + state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \ + state1 = state0; \ + state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \ + state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \ + + +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); + __m128i tmp; + __m128i state0, state1; + + if (numBlocks == 0) + return; + + state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); + state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]); + + PREPARE_STATE + + do + { + __m128i state0_save, state1_save; + __m128i m0, m1, m2, m3; + __m128i msg; + // #define msg tmp + + state0_save = state0; + state1_save = state1; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + + + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + + ADD_EPI32(state0, state0_save); + ADD_EPI32(state1, state1_save); + + data += 64; + } + while (--numBlocks); + + PREPARE_STATE + + _mm_storeu_si128((__m128i *) (void *) &state[0], state0); + _mm_storeu_si128((__m128i *) (void *) &state[4], state1); +} + +#endif // USE_HW_SHA + +#elif defined(MY_CPU_ARM_OR_ARM64) + + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_SHA + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define USE_HW_SHA + #endif + #elif defined(_MSC_VER) + #if _MSC_VER >= 1910 + #define USE_HW_SHA + #endif + #endif + +#ifdef USE_HW_SHA + +// #pragma message("=== Sha256 HW === ") + +#if defined(__clang__) || defined(__GNUC__) + #ifdef MY_CPU_ARM64 + #define ATTRIB_SHA __attribute__((__target__("+crypto"))) + #else + #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif +#else + // _MSC_VER + // for arm32 + #define _ARM_USE_NEW_NEON_INTRINSICS +#endif + +#if defined(_MSC_VER) && defined(MY_CPU_ARM64) +#include +#else +#include +#endif + +typedef uint32x4_t v128; +// typedef __n128 v128; // MSVC + +#ifdef MY_CPU_BE + #define MY_rev32_for_LE(x) +#else + #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x))) +#endif + +#define LOAD_128(_p) (*(const v128 *)(const void *)(_p)) +#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v) + +#define LOAD_SHUFFLE(m, k) \ + m = LOAD_128((data + (k) * 16)); \ + MY_rev32_for_LE(m); \ + +// K array must be aligned for 16-bytes at least. +extern +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; + +#define K SHA256_K_ARRAY + + +#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src); +#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3); + +#define SM1(g0, g1, g2, g3) SHA256_SU0(g3, g0) +#define SM2(g0, g1, g2, g3) SHA25G_SU1(g2, g0, g1) +#define NNN(g0, g1, g2, g3) + + +#define R4(k, g0, g1, g2, g3, OP0, OP1) \ + msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \ + tmp = state0; \ + state0 = vsha256hq_u32( state0, state1, msg ); \ + state1 = vsha256h2q_u32( state1, tmp, msg ); \ + OP0(g0, g1, g2, g3); \ + OP1(g0, g1, g2, g3); \ + + +#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ + R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ + + +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + v128 state0, state1; + + if (numBlocks == 0) + return; + + state0 = LOAD_128(&state[0]); + state1 = LOAD_128(&state[4]); + + do + { + v128 state0_save, state1_save; + v128 m0, m1, m2, m3; + v128 msg, tmp; + + state0_save = state0; + state1_save = state1; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + + state0 = vaddq_u32(state0, state0_save); + state1 = vaddq_u32(state1, state1_save); + + data += 64; + } + while (--numBlocks); + + STORE_128(&state[0], state0); + STORE_128(&state[4], state1); +} + +#endif // USE_HW_SHA + +#endif // MY_CPU_ARM_OR_ARM64 + + +#ifndef USE_HW_SHA + +// #error Stop_Compiling_UNSUPPORTED_SHA +// #include + +// #include "Sha256.h" +void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +#pragma message("Sha256 HW-SW stub was used") + +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + Sha256_UpdateBlocks(state, data, numBlocks); + /* + UNUSED_VAR(state); + UNUSED_VAR(data); + UNUSED_VAR(numBlocks); + exit(1); + return; + */ +} + +#endif diff --git a/deps/LZMA-SDK/C/Threads.c b/deps/LZMA-SDK/C/Threads.c index 8fd86f224..402abab01 100644 --- a/deps/LZMA-SDK/C/Threads.c +++ b/deps/LZMA-SDK/C/Threads.c @@ -1,8 +1,10 @@ /* Threads.c -- multithreading library -2017-06-26 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #include "Precomp.h" +#ifdef _WIN32 + #ifndef UNDER_CE #include #endif @@ -29,28 +31,103 @@ WRes HandlePtr_Close(HANDLE *p) return 0; } -WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); } +WRes Handle_WaitObject(HANDLE h) +{ + DWORD dw = WaitForSingleObject(h, INFINITE); + /* + (dw) result: + WAIT_OBJECT_0 // 0 + WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space + WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space + WAIT_FAILED // 0xFFFFFFFF + */ + if (dw == WAIT_FAILED) + { + dw = GetLastError(); + if (dw == 0) + return WAIT_FAILED; + } + return (WRes)dw; +} + +#define Thread_Wait(p) Handle_WaitObject(*(p)) + +WRes Thread_Wait_Close(CThread *p) +{ + WRes res = Thread_Wait(p); + WRes res2 = Thread_Close(p); + return (res != 0 ? res : res2); +} WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) { /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ - + #ifdef UNDER_CE - + DWORD threadId; *p = CreateThread(0, 0, func, param, 0, &threadId); - + #else - + unsigned threadId; - *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId); - + *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); + #endif /* maybe we must use errno here, but probably GetLastError() is also OK. */ return HandleToWRes(*p); } + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + #ifdef UNDER_CE + + UNUSED_VAR(affinity) + return Thread_Create(p, func, param); + + #else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + *p = h; + wres = HandleToWRes(h); + if (h) + { + { + // DWORD_PTR prevMask = + SetThreadAffinityMask(h, (DWORD_PTR)affinity); + /* + if (prevMask == 0) + { + // affinity change is non-critical error, so we can ignore it + // wres = GetError(); + } + */ + } + { + DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) { *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL); @@ -68,6 +145,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) { + // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore() *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL); return HandleToWRes(*p); } @@ -93,3 +171,336 @@ WRes CriticalSection_Init(CCriticalSection *p) #endif return 0; } + + + + +#else // _WIN32 + +// ---------- POSIX ---------- + +#ifndef __APPLE__ +#ifndef _7ZIP_AFFINITY_DISABLE +// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET +#define _GNU_SOURCE +#endif +#endif + +#include "Threads.h" + +#include +#include +#include +#ifdef _7ZIP_AFFINITY_SUPPORTED +// #include +#endif + + +// #include +// #define PRF(p) p +#define PRF(p) + +#define Print(s) PRF(printf("\n%s\n", s)) + +// #include + +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) +{ + // new thread in Posix probably inherits affinity from parrent thread + Print("Thread_Create_With_CpuSet"); + + pthread_attr_t attr; + int ret; + // int ret2; + + p->_created = 0; + + RINOK(pthread_attr_init(&attr)); + + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + if (!ret) + { + if (cpuSet) + { + #ifdef _7ZIP_AFFINITY_SUPPORTED + + /* + printf("\n affinity :"); + unsigned i; + for (i = 0; i < sizeof(*cpuSet) && i < 8; i++) + { + Byte b = *((const Byte *)cpuSet + i); + char temp[32]; + #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + temp[0] = GET_HEX_CHAR((b & 0xF)); + temp[1] = GET_HEX_CHAR((b >> 4)); + // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian + // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian + temp[2] = 0; + printf("%s", temp); + } + printf("\n"); + */ + + // ret2 = + pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + #endif + } + + ret = pthread_create(&p->_tid, &attr, func, param); + + if (!ret) + { + p->_created = 1; + /* + if (cpuSet) + { + // ret2 = + pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + } + */ + } + } + // ret2 = + pthread_attr_destroy(&attr); + // if (ret2 != 0) ret = ret2; + return ret; +} + + +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) +{ + return Thread_Create_With_CpuSet(p, func, param, NULL); +} + + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + Print("Thread_Create_WithAffinity"); + CCpuSet cs; + unsigned i; + CpuSet_Zero(&cs); + for (i = 0; i < sizeof(affinity) * 8; i++) + { + if (affinity == 0) + break; + if (affinity & 1) + { + CpuSet_Set(&cs, i); + } + affinity >>= 1; + } + return Thread_Create_With_CpuSet(p, func, param, &cs); +} + + +WRes Thread_Close(CThread *p) +{ + // Print("Thread_Close"); + int ret; + if (!p->_created) + return 0; + + ret = pthread_detach(p->_tid); + p->_tid = 0; + p->_created = 0; + return ret; +} + + +WRes Thread_Wait_Close(CThread *p) +{ + // Print("Thread_Wait_Close"); + void *thread_return; + int ret; + if (!p->_created) + return EINVAL; + + ret = pthread_join(p->_tid, &thread_return); + // probably we can't use that (_tid) after pthread_join(), so we close thread here + p->_created = 0; + p->_tid = 0; + return ret; +} + + + +static WRes Event_Create(CEvent *p, int manualReset, int signaled) +{ + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_manual_reset = manualReset; + p->_state = (signaled ? True : False); + p->_created = 1; + return 0; +} + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) + { return Event_Create(p, True, signaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) + { return ManualResetEvent_Create(p, 0); } +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) + { return Event_Create(p, False, signaled); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) + { return AutoResetEvent_Create(p, 0); } + + +WRes Event_Set(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = True; + int res1 = pthread_cond_broadcast(&p->_cond); + int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); +} + +WRes Event_Reset(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = False; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Wait(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_state == False) + { + // ETIMEDOUT + // ret = + pthread_cond_wait(&p->_cond, &p->_mutex); + // if (ret != 0) break; + } + if (p->_manual_reset == False) + { + p->_state = False; + } + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Close(CEvent *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_count = initCount; + p->_maxCount = maxCount; + p->_created = 1; + return 0; +} + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + UInt32 newCount; + int ret; + + if (releaseCount < 1) + return EINVAL; + + RINOK(pthread_mutex_lock(&p->_mutex)); + + newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + ret = ERROR_TOO_MANY_POSTS; // EINVAL; + else + { + p->_count = newCount; + ret = pthread_cond_broadcast(&p->_cond); + } + RINOK(pthread_mutex_unlock(&p->_mutex)); + return ret; +} + +WRes Semaphore_Wait(CSemaphore *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_count < 1) + { + pthread_cond_wait(&p->_cond, &p->_mutex); + } + p->_count--; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Semaphore_Close(CSemaphore *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + + +WRes CriticalSection_Init(CCriticalSection *p) +{ + // Print("CriticalSection_Init"); + if (!p) + return EINTR; + return pthread_mutex_init(&p->_mutex, NULL); +} + +void CriticalSection_Enter(CCriticalSection *p) +{ + // Print("CriticalSection_Enter"); + if (p) + { + // int ret = + pthread_mutex_lock(&p->_mutex); + } +} + +void CriticalSection_Leave(CCriticalSection *p) +{ + // Print("CriticalSection_Leave"); + if (p) + { + // int ret = + pthread_mutex_unlock(&p->_mutex); + } +} + +void CriticalSection_Delete(CCriticalSection *p) +{ + // Print("CriticalSection_Delete"); + if (p) + { + // int ret = + pthread_mutex_destroy(&p->_mutex); + } +} + +LONG InterlockedIncrement(LONG volatile *addend) +{ + // Print("InterlockedIncrement"); + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend + 1; + *addend = val; + return val; + #else + return __sync_add_and_fetch(addend, 1); + #endif +} + +#endif // _WIN32 diff --git a/deps/LZMA-SDK/C/Threads.h b/deps/LZMA-SDK/C/Threads.h index f913241ae..8d4a10df9 100644 --- a/deps/LZMA-SDK/C/Threads.h +++ b/deps/LZMA-SDK/C/Threads.h @@ -1,38 +1,106 @@ /* Threads.h -- multithreading library -2017-06-18 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #ifndef __7Z_THREADS_H #define __7Z_THREADS_H #ifdef _WIN32 -#include +#include +#else + +#if !defined(__APPLE__) && !defined(_AIX) +#ifndef _7ZIP_AFFINITY_DISABLE +#define _7ZIP_AFFINITY_SUPPORTED +// #define _GNU_SOURCE +#endif +#endif + +#include #endif #include "7zTypes.h" EXTERN_C_BEGIN +#ifdef _WIN32 + WRes HandlePtr_Close(HANDLE *h); WRes Handle_WaitObject(HANDLE h); typedef HANDLE CThread; -#define Thread_Construct(p) *(p) = NULL + +#define Thread_Construct(p) { *(p) = NULL; } #define Thread_WasCreated(p) (*(p) != NULL) #define Thread_Close(p) HandlePtr_Close(p) -#define Thread_Wait(p) Handle_WaitObject(*(p)) +// #define Thread_Wait(p) Handle_WaitObject(*(p)) typedef -#ifdef UNDER_CE - DWORD + #ifdef UNDER_CE + DWORD + #else + unsigned + #endif + THREAD_FUNC_RET_TYPE; + +typedef DWORD_PTR CAffinityMask; +typedef DWORD_PTR CCpuSet; + +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } + +#else // _WIN32 + +typedef struct _CThread +{ + pthread_t _tid; + int _created; +} CThread; + +#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } +#define Thread_WasCreated(p) ((p)->_created != 0) +WRes Thread_Close(CThread *p); +// #define Thread_Wait Thread_Wait_Close + +typedef void * THREAD_FUNC_RET_TYPE; + +typedef UInt64 CAffinityMask; + +#ifdef _7ZIP_AFFINITY_SUPPORTED + +typedef cpu_set_t CCpuSet; +#define CpuSet_Zero(p) CPU_ZERO(p) +#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) +#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) + #else - unsigned + +typedef UInt64 CCpuSet; +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } +#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) + #endif - THREAD_FUNC_RET_TYPE; + + +#endif // _WIN32 + #define THREAD_FUNC_CALL_TYPE MY_STD_CALL #define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param); +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity); +WRes Thread_Wait_Close(CThread *p); + +#ifdef _WIN32 +#define Thread_Create_With_CpuSet(p, func, param, cs) \ + Thread_Create_With_Affinity(p, func, param, *cs) +#else +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); +#endif + + +#ifdef _WIN32 typedef HANDLE CEvent; typedef CEvent CAutoResetEvent; @@ -63,6 +131,67 @@ WRes CriticalSection_Init(CCriticalSection *p); #define CriticalSection_Enter(p) EnterCriticalSection(p) #define CriticalSection_Leave(p) LeaveCriticalSection(p) + +#else // _WIN32 + +typedef struct _CEvent +{ + int _created; + int _manual_reset; + int _state; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(p) (p)->_created = 0 +#define Event_IsCreated(p) ((p)->_created) + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); +WRes Event_Set(CEvent *p); +WRes Event_Reset(CEvent *p); +WRes Event_Wait(CEvent *p); +WRes Event_Close(CEvent *p); + + +typedef struct _CSemaphore +{ + int _created; + UInt32 _count; + UInt32 _maxCount; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CSemaphore; + +#define Semaphore_Construct(p) (p)->_created = 0 +#define Semaphore_IsCreated(p) ((p)->_created) + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) +WRes Semaphore_Wait(CSemaphore *p); +WRes Semaphore_Close(CSemaphore *p); + + +typedef struct _CCriticalSection +{ + pthread_mutex_t _mutex; +} CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection *p); +void CriticalSection_Delete(CCriticalSection *cs); +void CriticalSection_Enter(CCriticalSection *cs); +void CriticalSection_Leave(CCriticalSection *cs); + +LONG InterlockedIncrement(LONG volatile *addend); + +#endif // _WIN32 + EXTERN_C_END #endif diff --git a/deps/LZMA-SDK/C/Util/7z/7zMain.c b/deps/LZMA-SDK/C/Util/7z/7zMain.c index 1c02b48ec..3ab63a3d9 100644 --- a/deps/LZMA-SDK/C/Util/7z/7zMain.c +++ b/deps/LZMA-SDK/C/Util/7z/7zMain.c @@ -1,5 +1,5 @@ /* 7zMain.c - Test application for 7z Decoder -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -20,6 +20,13 @@ #ifdef _WIN32 #include #else +#include +#include +#ifdef __GNUC__ +#include +#endif +#include +// #include #include #include #endif @@ -108,7 +115,7 @@ static Byte *Utf16_To_Utf8(Byte *dest, const UInt16 *src, const UInt16 *srcLim) if (val < 0x80) { - *dest++ = (char)val; + *dest++ = (Byte)val; continue; } @@ -162,21 +169,21 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s ) { unsigned len = 0; - for (len = 0; s[len] != 0; len++); + for (len = 0; s[len] != 0; len++) {} #ifndef _USE_UTF8 { - unsigned size = len * 3 + 100; + const unsigned size = len * 3 + 100; if (!Buf_EnsureSize(buf, size)) return SZ_ERROR_MEM; { buf->data[0] = 0; if (len != 0) { - char defaultChar = '_'; + const char defaultChar = '_'; BOOL defUsed; - unsigned numChars = 0; - numChars = WideCharToMultiByte(codePage, 0, (LPCWSTR)s, len, (char *)buf->data, size, &defaultChar, &defUsed); + const unsigned numChars = (unsigned)WideCharToMultiByte( + codePage, 0, (LPCWSTR)s, (int)len, (char *)buf->data, (int)size, &defaultChar, &defUsed); if (numChars == 0 || numChars >= size) return SZ_ERROR_FAIL; buf->data[numChars] = 0; @@ -192,8 +199,8 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s #ifdef _WIN32 #ifndef USE_WINDOWS_FILE static UINT g_FileCodePage = CP_ACP; + #define MY_FILE_CODE_PAGE_PARAM ,g_FileCodePage #endif - #define MY_FILE_CODE_PAGE_PARAM ,g_FileCodePage #else #define MY_FILE_CODE_PAGE_PARAM #endif @@ -300,17 +307,142 @@ static void UIntToStr_2(char *s, unsigned value) s[1] = (char)('0' + (value % 10)); } + #define PERIOD_4 (4 * 365 + 1) #define PERIOD_100 (PERIOD_4 * 25 - 1) #define PERIOD_400 (PERIOD_100 * 4 + 1) -static void ConvertFileTimeToString(const CNtfsFileTime *nt, char *s) + + +#ifndef _WIN32 + +// MS uses long for BOOL, but long is 32-bit in MS. So we use int. +// typedef long BOOL; +typedef int BOOL; + +typedef struct _FILETIME +{ + DWORD dwLowDateTime; + DWORD dwHighDateTime; +} FILETIME; + +static LONG TIME_GetBias() +{ + time_t utc = time(NULL); + struct tm *ptm = localtime(&utc); + int localdaylight = ptm->tm_isdst; /* daylight for local timezone */ + ptm = gmtime(&utc); + ptm->tm_isdst = localdaylight; /* use local daylight, not that of Greenwich */ + LONG bias = (int)(mktime(ptm)-utc); + return bias; +} + +#define TICKS_PER_SEC 10000000 + +#define GET_TIME_64(pft) ((pft)->dwLowDateTime | ((UInt64)(pft)->dwHighDateTime << 32)) + +#define SET_FILETIME(ft, v64) \ + (ft)->dwLowDateTime = (DWORD)v64; \ + (ft)->dwHighDateTime = (DWORD)(v64 >> 32); + +#define WINAPI +#define TRUE 1 + +static BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime) +{ + UInt64 v = GET_TIME_64(fileTime); + v = (UInt64)((Int64)v - (Int64)TIME_GetBias() * TICKS_PER_SEC); + SET_FILETIME(localFileTime, v); + return TRUE; +} + +static const UInt32 kNumTimeQuantumsInSecond = 10000000; +static const UInt32 kFileTimeStartYear = 1601; +static const UInt32 kUnixTimeStartYear = 1970; +static const UInt64 kUnixTimeOffset = + (UInt64)60 * 60 * 24 * (89 + 365 * (kUnixTimeStartYear - kFileTimeStartYear)); + +static Int64 Time_FileTimeToUnixTime64(const FILETIME *ft) +{ + UInt64 winTime = GET_TIME_64(ft); + return (Int64)(winTime / kNumTimeQuantumsInSecond) - (Int64)kUnixTimeOffset; +} + +#if defined(_AIX) + #define MY_ST_TIMESPEC st_timespec +#else + #define MY_ST_TIMESPEC timespec +#endif + +static void FILETIME_To_timespec(const FILETIME *ft, struct MY_ST_TIMESPEC *ts) +{ + if (ft) + { + const Int64 sec = Time_FileTimeToUnixTime64(ft); + // time_t is long + const time_t sec2 = (time_t)sec; + if (sec2 == sec) + { + ts->tv_sec = sec2; + UInt64 winTime = GET_TIME_64(ft); + ts->tv_nsec = (long)((winTime % 10000000) * 100);; + return; + } + } + // else + { + ts->tv_sec = 0; + // ts.tv_nsec = UTIME_NOW; // set to the current time + ts->tv_nsec = UTIME_OMIT; // keep old timesptamp + } +} + +static WRes Set_File_FILETIME(const UInt16 *name, const FILETIME *mTime) +{ + struct timespec times[2]; + + const int flags = 0; // follow link + // = AT_SYMLINK_NOFOLLOW; // don't follow link + + CBuf buf; + int res; + Buf_Init(&buf); + RINOK(Utf16_To_Char(&buf, name MY_FILE_CODE_PAGE_PARAM)); + FILETIME_To_timespec(NULL, ×[0]); + FILETIME_To_timespec(mTime, ×[1]); + res = utimensat(AT_FDCWD, (const char *)buf.data, times, flags); + Buf_Free(&buf, &g_Alloc); + if (res == 0) + return 0; + return errno; +} + +#endif + +static void NtfsFileTime_to_FILETIME(const CNtfsFileTime *t, FILETIME *ft) +{ + ft->dwLowDateTime = (DWORD)(t->Low); + ft->dwHighDateTime = (DWORD)(t->High); +} + +static void ConvertFileTimeToString(const CNtfsFileTime *nTime, char *s) { unsigned year, mon, hour, min, sec; Byte ms[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; unsigned t; UInt32 v; - UInt64 v64 = nt->Low | ((UInt64)nt->High << 32); + // UInt64 v64 = nt->Low | ((UInt64)nt->High << 32); + UInt64 v64; + { + FILETIME fileTime, locTime; + NtfsFileTime_to_FILETIME(nTime, &fileTime); + if (!FileTimeToLocalFileTime(&fileTime, &locTime)) + { + locTime.dwHighDateTime = + locTime.dwLowDateTime = 0; + } + v64 = locTime.dwLowDateTime | ((UInt64)locTime.dwHighDateTime << 32); + } v64 /= 10000000; sec = (unsigned)(v64 % 60); v64 /= 60; min = (unsigned)(v64 % 60); v64 /= 60; @@ -354,6 +486,43 @@ static void PrintError(char *s) PrintLF(); } +static void PrintError_WRes(const char *message, WRes wres) +{ + Print("\nERROR: "); + Print(message); + PrintLF(); + { + char s[32]; + UIntToStr(s, (unsigned)wres, 1); + Print("System error code: "); + Print(s); + } + // sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres); + #ifdef _WIN32 + { + char *s = NULL; + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, wres, 0, (LPSTR) &s, 0, NULL) != 0 && s) + { + Print(" : "); + Print(s); + LocalFree(s); + } + } + #else + { + const char *s = strerror(wres); + if (s) + { + Print(" : "); + Print(s); + } + } + #endif + PrintLF(); +} + static void GetAttribString(UInt32 wa, BoolInt isDir, char *s) { #ifdef USE_WINDOWS_FILE @@ -413,17 +582,22 @@ int MY_CDECL main(int numargs, char *args[]) allocImp = g_Alloc; allocTempImp = g_Alloc; - #ifdef UNDER_CE - if (InFile_OpenW(&archiveStream.file, L"\test.7z")) - #else - if (InFile_Open(&archiveStream.file, args[2])) - #endif { - PrintError("can not open input file"); - return 1; + WRes wres = + #ifdef UNDER_CE + InFile_OpenW(&archiveStream.file, L"\test.7z"); // change it + #else + InFile_Open(&archiveStream.file, args[2]); + #endif + if (wres != 0) + { + PrintError_WRes("cannot open input file", wres); + return 1; + } } FileInStream_CreateVTable(&archiveStream); + archiveStream.wres = 0; LookToRead2_CreateVTable(&lookStream, False); lookStream.buf = NULL; @@ -483,7 +657,7 @@ int MY_CDECL main(int numargs, char *args[]) size_t outSizeProcessed = 0; // const CSzFileItem *f = db.Files + i; size_t len; - unsigned isDir = SzArEx_IsDir(&db, i); + const BoolInt isDir = SzArEx_IsDir(&db, i); if (listCommand == 0 && isDir && !fullPaths) continue; len = SzArEx_GetFileNameUtf16(&db, i, NULL); @@ -546,8 +720,8 @@ int MY_CDECL main(int numargs, char *args[]) } Print(testCommand ? - "Testing ": - "Extracting "); + "T ": + "- "); res = PrintString(temp); if (res != SZ_OK) break; @@ -591,27 +765,37 @@ int MY_CDECL main(int numargs, char *args[]) PrintLF(); continue; } - else if (OutFile_OpenUtf16(&outFile, destPath)) + else { - PrintError("can not open output file"); - res = SZ_ERROR_FAIL; - break; + WRes wres = OutFile_OpenUtf16(&outFile, destPath); + if (wres != 0) + { + PrintError_WRes("cannot open output file", wres); + res = SZ_ERROR_FAIL; + break; + } } processedSize = outSizeProcessed; - if (File_Write(&outFile, outBuffer + offset, &processedSize) != 0 || processedSize != outSizeProcessed) { - PrintError("can not write output file"); - res = SZ_ERROR_FAIL; - break; + WRes wres = File_Write(&outFile, outBuffer + offset, &processedSize); + if (wres != 0 || processedSize != outSizeProcessed) + { + PrintError_WRes("cannot write output file", wres); + res = SZ_ERROR_FAIL; + break; + } } - #ifdef USE_WINDOWS_FILE { - FILETIME mtime, ctime; + FILETIME mtime; FILETIME *mtimePtr = NULL; + + #ifdef USE_WINDOWS_FILE + FILETIME ctime; FILETIME *ctimePtr = NULL; + #endif if (SzBitWithVals_Check(&db.MTime, i)) { @@ -620,6 +804,8 @@ int MY_CDECL main(int numargs, char *args[]) mtime.dwHighDateTime = (DWORD)(t->High); mtimePtr = &mtime; } + + #ifdef USE_WINDOWS_FILE if (SzBitWithVals_Check(&db.CTime, i)) { const CNtfsFileTime *t = &db.CTime.Vals[i]; @@ -627,16 +813,29 @@ int MY_CDECL main(int numargs, char *args[]) ctime.dwHighDateTime = (DWORD)(t->High); ctimePtr = &ctime; } + if (mtimePtr || ctimePtr) SetFileTime(outFile.handle, ctimePtr, NULL, mtimePtr); - } - #endif + #endif - if (File_Close(&outFile)) - { - PrintError("can not close output file"); - res = SZ_ERROR_FAIL; - break; + { + WRes wres = File_Close(&outFile); + if (wres != 0) + { + PrintError_WRes("cannot close output file", wres); + res = SZ_ERROR_FAIL; + break; + } + } + + #ifndef USE_WINDOWS_FILE + #ifdef _WIN32 + mtimePtr = mtimePtr; + #else + if (mtimePtr) + Set_File_FILETIME(destPath, mtimePtr); + #endif + #endif } #ifdef USE_WINDOWS_FILE @@ -672,13 +871,15 @@ int MY_CDECL main(int numargs, char *args[]) if (res == SZ_ERROR_UNSUPPORTED) PrintError("decoder doesn't support this archive"); else if (res == SZ_ERROR_MEM) - PrintError("can not allocate memory"); + PrintError("cannot allocate memory"); else if (res == SZ_ERROR_CRC) PrintError("CRC error"); + else if (res == SZ_ERROR_READ /* || archiveStream.Res != 0 */) + PrintError_WRes("Read Error", archiveStream.wres); else { char s[32]; - UInt64ToStr(res, s, 0); + UInt64ToStr((unsigned)res, s, 0); PrintError(s); } diff --git a/deps/LZMA-SDK/C/Util/7z/makefile.gcc b/deps/LZMA-SDK/C/Util/7z/makefile.gcc index f707935aa..d6ef9b2a7 100644 --- a/deps/LZMA-SDK/C/Util/7z/makefile.gcc +++ b/deps/LZMA-SDK/C/Util/7z/makefile.gcc @@ -1,75 +1,34 @@ -PROG = 7zDec -CXX = gcc -LIB = -RM = rm -f -CFLAGS = -c -O2 -Wall - -OBJS = 7zMain.o 7zAlloc.o 7zArcIn.o 7zBuf.o 7zBuf2.o 7zCrc.o 7zCrcOpt.o 7zDec.o CpuArch.o Delta.o LzmaDec.o Lzma2Dec.o Bra.o Bra86.o BraIA64.o Bcj2.o Ppmd7.o Ppmd7Dec.o 7zFile.o 7zStream.o - -all: $(PROG) - -$(PROG): $(OBJS) - $(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB) - -7zMain.o: 7zMain.c - $(CXX) $(CFLAGS) 7zMain.c - -7zAlloc.o: ../../7zAlloc.c - $(CXX) $(CFLAGS) ../../7zAlloc.c - -7zArcIn.o: ../../7zArcIn.c - $(CXX) $(CFLAGS) ../../7zArcIn.c - -7zBuf.o: ../../7zBuf.c - $(CXX) $(CFLAGS) ../../7zBuf.c - -7zBuf2.o: ../../7zBuf2.c - $(CXX) $(CFLAGS) ../../7zBuf2.c - -7zCrc.o: ../../7zCrc.c - $(CXX) $(CFLAGS) ../../7zCrc.c - -7zCrcOpt.o: ../../7zCrc.c - $(CXX) $(CFLAGS) ../../7zCrcOpt.c - -7zDec.o: ../../7zDec.c - $(CXX) $(CFLAGS) -D_7ZIP_PPMD_SUPPPORT ../../7zDec.c - -CpuArch.o: ../../CpuArch.c - $(CXX) $(CFLAGS) ../../CpuArch.c - -Delta.o: ../../Delta.c - $(CXX) $(CFLAGS) ../../Delta.c - -LzmaDec.o: ../../LzmaDec.c - $(CXX) $(CFLAGS) ../../LzmaDec.c - -Lzma2Dec.o: ../../Lzma2Dec.c - $(CXX) $(CFLAGS) ../../Lzma2Dec.c - -Bra.o: ../../Bra.c - $(CXX) $(CFLAGS) ../../Bra.c - -Bra86.o: ../../Bra86.c - $(CXX) $(CFLAGS) ../../Bra86.c - -BraIA64.o: ../../BraIA64.c - $(CXX) $(CFLAGS) ../../BraIA64.c - -Bcj2.o: ../../Bcj2.c - $(CXX) $(CFLAGS) ../../Bcj2.c - -Ppmd7.o: ../../Ppmd7.c - $(CXX) $(CFLAGS) ../../Ppmd7.c - -Ppmd7Dec.o: ../../Ppmd7Dec.c - $(CXX) $(CFLAGS) ../../Ppmd7Dec.c - -7zFile.o: ../../7zFile.c - $(CXX) $(CFLAGS) ../../7zFile.c - -7zStream.o: ../../7zStream.c - $(CXX) $(CFLAGS) ../../7zStream.c - -clean: - -$(RM) $(PROG) $(OBJS) +PROG = 7zdec + +LOCAL_FLAGS = -D_7ZIP_PPMD_SUPPPORT + +include ../../../CPP/7zip/LzmaDec_gcc.mak + + +OBJS = \ + $(LZMA_DEC_OPT_OBJS) \ + $O/Bcj2.o \ + $O/Bra.o \ + $O/Bra86.o \ + $O/BraIA64.o \ + $O/CpuArch.o \ + $O/Delta.o \ + $O/Lzma2Dec.o \ + $O/LzmaDec.o \ + $O/Ppmd7.o \ + $O/Ppmd7Dec.o \ + $O/7zCrc.o \ + $O/7zCrcOpt.o \ + $O/Sha256.o \ + $O/Sha256Opt.o \ + $O/7zAlloc.o \ + $O/7zArcIn.o \ + $O/7zBuf.o \ + $O/7zBuf2.o \ + $O/7zDec.o \ + $O/7zMain.o \ + $O/7zFile.o \ + $O/7zStream.o \ + + +include ../../7zip_gcc_c.mak diff --git a/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c b/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c index 82130e85d..6b4293e33 100644 --- a/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c +++ b/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c @@ -1,5 +1,5 @@ /* LzmaUtil.c -- Test application for LZMA compression -2018-07-04 : Igor Pavlov : Public domain */ +2021-02-15 : Igor Pavlov : Public domain */ #include "../../Precomp.h" @@ -15,9 +15,9 @@ #include "../../LzmaDec.h" #include "../../LzmaEnc.h" -static const char * const kCantReadMessage = "Can not read input file"; -static const char * const kCantWriteMessage = "Can not write output file"; -static const char * const kCantAllocateMessage = "Can not allocate memory"; +static const char * const kCantReadMessage = "Cannot read input file"; +static const char * const kCantWriteMessage = "Cannot write output file"; +static const char * const kCantAllocateMessage = "Cannot allocate memory"; static const char * const kDataErrorMessage = "Data error"; static void PrintHelp(char *buffer) @@ -37,9 +37,25 @@ static int PrintError(char *buffer, const char *message) return 1; } +static int PrintError_WRes(char *buffer, const char *message, WRes wres) +{ + strcat(buffer, "\nError: "); + strcat(buffer, message); + sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres); + #ifndef _WIN32 + { + const char *s = strerror(wres); + if (s) + sprintf(buffer + strlen(buffer), " : %s", s); + } + #endif + strcat(buffer, "\n"); + return 1; +} + static int PrintErrorNumber(char *buffer, SRes val) { - sprintf(buffer + strlen(buffer), "\nError code: %x\n", (unsigned)val); + sprintf(buffer + strlen(buffer), "\n7-Zip error code: %d\n", (unsigned)val); return 1; } @@ -181,9 +197,11 @@ static int main2(int numArgs, const char *args[], char *rs) FileSeqInStream_CreateVTable(&inStream); File_Construct(&inStream.file); + inStream.wres = 0; FileOutStream_CreateVTable(&outStream); File_Construct(&outStream.file); + outStream.wres = 0; if (numArgs == 1) { @@ -206,14 +224,19 @@ static int main2(int numArgs, const char *args[], char *rs) return PrintError(rs, "Incorrect UInt32 or UInt64"); } - if (InFile_Open(&inStream.file, args[2]) != 0) - return PrintError(rs, "Can not open input file"); + { + WRes wres = InFile_Open(&inStream.file, args[2]); + if (wres != 0) + return PrintError_WRes(rs, "Cannot open input file", wres); + } if (numArgs > 3) { + WRes wres; useOutFile = True; - if (OutFile_Open(&outStream.file, args[3]) != 0) - return PrintError(rs, "Can not open output file"); + wres = OutFile_Open(&outStream.file, args[3]); + if (wres != 0) + return PrintError_WRes(rs, "Cannot open output file", wres); } else if (encodeMode) PrintUserError(rs); @@ -221,7 +244,9 @@ static int main2(int numArgs, const char *args[], char *rs) if (encodeMode) { UInt64 fileSize; - File_GetLength(&inStream.file, &fileSize); + WRes wres = File_GetLength(&inStream.file, &fileSize); + if (wres != 0) + return PrintError_WRes(rs, "Cannot get file length", wres); res = Encode(&outStream.vt, &inStream.vt, fileSize, rs); } else @@ -240,9 +265,9 @@ static int main2(int numArgs, const char *args[], char *rs) else if (res == SZ_ERROR_DATA) return PrintError(rs, kDataErrorMessage); else if (res == SZ_ERROR_WRITE) - return PrintError(rs, kCantWriteMessage); + return PrintError_WRes(rs, kCantWriteMessage, outStream.wres); else if (res == SZ_ERROR_READ) - return PrintError(rs, kCantReadMessage); + return PrintError_WRes(rs, kCantReadMessage, inStream.wres); return PrintErrorNumber(rs, res); } return 0; diff --git a/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc b/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc index 12a72bb8b..89b3e11f7 100644 --- a/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc +++ b/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc @@ -1,44 +1,19 @@ -PROG = lzma -CXX = g++ -LIB = -RM = rm -f -CFLAGS = -c -O2 -Wall -D_7ZIP_ST +PROG = 7lzma -OBJS = \ - LzmaUtil.o \ - Alloc.o \ - LzFind.o \ - LzmaDec.o \ - LzmaEnc.o \ - 7zFile.o \ - 7zStream.o \ - - -all: $(PROG) - -$(PROG): $(OBJS) - $(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB) $(LIB2) - -LzmaUtil.o: LzmaUtil.c - $(CXX) $(CFLAGS) LzmaUtil.c - -Alloc.o: ../../Alloc.c - $(CXX) $(CFLAGS) ../../Alloc.c +include ../../../CPP/7zip/LzmaDec_gcc.mak -LzFind.o: ../../LzFind.c - $(CXX) $(CFLAGS) ../../LzFind.c -LzmaDec.o: ../../LzmaDec.c - $(CXX) $(CFLAGS) ../../LzmaDec.c - -LzmaEnc.o: ../../LzmaEnc.c - $(CXX) $(CFLAGS) ../../LzmaEnc.c - -7zFile.o: ../../7zFile.c - $(CXX) $(CFLAGS) ../../7zFile.c - -7zStream.o: ../../7zStream.c - $(CXX) $(CFLAGS) ../../7zStream.c - -clean: - -$(RM) $(PROG) $(OBJS) +OBJS = \ + $(LZMA_DEC_OPT_OBJS) \ + $O/7zFile.o \ + $O/7zStream.o \ + $O/Alloc.o \ + $O/LzFind.o \ + $O/LzFindMt.o \ + $O/LzmaDec.o \ + $O/LzmaEnc.o \ + $O/LzmaUtil.o \ + $O/Threads.o \ + + +include ../../7zip_gcc_c.mak diff --git a/deps/LZMA-SDK/C/Xz.c b/deps/LZMA-SDK/C/Xz.c index 7e061d6e7..d6e2596a9 100644 --- a/deps/LZMA-SDK/C/Xz.c +++ b/deps/LZMA-SDK/C/Xz.c @@ -1,5 +1,5 @@ /* Xz.c - Xz -2017-05-12 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -41,7 +41,7 @@ void Xz_Free(CXzStream *p, ISzAllocPtr alloc) unsigned XzFlags_GetCheckSize(CXzStreamFlags f) { unsigned t = XzFlags_GetCheckType(f); - return (t == 0) ? 0 : (4 << ((t - 1) / 3)); + return (t == 0) ? 0 : ((unsigned)4 << ((t - 1) / 3)); } void XzCheck_Init(CXzCheck *p, unsigned mode) diff --git a/deps/LZMA-SDK/C/Xz.h b/deps/LZMA-SDK/C/Xz.h index fad56a3fb..cf9458e39 100644 --- a/deps/LZMA-SDK/C/Xz.h +++ b/deps/LZMA-SDK/C/Xz.h @@ -1,5 +1,5 @@ /* Xz.h - Xz interface -2018-07-04 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #ifndef __XZ_H #define __XZ_H @@ -47,7 +47,7 @@ typedef struct CXzFilter filters[XZ_NUM_FILTERS_MAX]; } CXzBlock; -#define XzBlock_GetNumFilters(p) (((p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) +#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) #define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0) #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0) #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) @@ -277,7 +277,10 @@ void XzUnpacker_Free(CXzUnpacker *p); { XzUnpacker_Init() for() + { XzUnpacker_Code(); + } + XzUnpacker_IsStreamWasFinished() } Interface-2 : Direct output buffer: @@ -288,7 +291,10 @@ void XzUnpacker_Free(CXzUnpacker *p); XzUnpacker_Init() XzUnpacker_SetOutBufMode(); // to set output buffer and size for() + { XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code() + } + XzUnpacker_IsStreamWasFinished() } Interface-3 : Direct output buffer : One call full decoding @@ -296,6 +302,7 @@ void XzUnpacker_Free(CXzUnpacker *p); It uses Interface-2 internally. { XzUnpacker_CodeFull() + XzUnpacker_IsStreamWasFinished() } */ @@ -309,8 +316,12 @@ Returns: SZ_OK status: CODER_STATUS_NOT_FINISHED, - CODER_STATUS_NEEDS_MORE_INPUT - maybe there are more xz streams, - call XzUnpacker_IsStreamWasFinished to check that current stream was finished + CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases: + 1) it needs more input data to finish current xz stream + 2) xz stream was finished successfully. But the decoder supports multiple + concatented xz streams. So it expects more input data for new xz streams. + Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully. + SZ_ERROR_MEM - Memory allocation error SZ_ERROR_DATA - Data error SZ_ERROR_UNSUPPORTED - Unsupported method or method properties @@ -335,12 +346,17 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ECoderFinishMode finishMode, ECoderStatus *status); +/* +If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished() +after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code(). +*/ + BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p); /* -XzUnpacker_GetExtraSize() returns then number of uncofirmed bytes, +XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes, if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state. -These bytes can be some bytes after xz archive, or +These bytes can be some data after xz archive, or it can be start of new xz stream. Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of @@ -371,19 +387,46 @@ BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p); -/* ---------- Multi Threading Decoding ---------- */ + + + +/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */ + +/* + if (CXzDecMtProps::numThreads > 1), the decoder can try to use + Multi-Threading. The decoder analyses xz block header, and if + there are pack size and unpack size values stored in xz block header, + the decoder reads compressed data of block to internal buffers, + and then it can start parallel decoding, if there are another blocks. + The decoder can switch back to Single-Thread decoding after some conditions. + + The sequence of calls for xz decoding with in/out Streams: + { + XzDecMt_Create() + XzDecMtProps_Init(XzDecMtProps) to set default values of properties + // then you can change some XzDecMtProps parameters with required values + // here you can set the number of threads and (memUseMax) - the maximum + Memory usage for multithreading decoding. + for() + { + XzDecMt_Decode() // one call per one file + } + XzDecMt_Destroy() + } +*/ typedef struct { - size_t inBufSize_ST; - size_t outStep_ST; - BoolInt ignoreErrors; + size_t inBufSize_ST; // size of input buffer for Single-Thread decoding + size_t outStep_ST; // size of output buffer for Single-Thread decoding + BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. #ifndef _7ZIP_ST - unsigned numThreads; - size_t inBufSize_MT; - size_t memUseMax; + unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding + size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created + size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. + // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer. #endif } CXzDecMtProps; @@ -393,7 +436,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p); typedef void * CXzDecMtHandle; /* - alloc : XzDecMt uses CAlignOffsetAlloc for addresses allocated by (alloc). + alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). allocMid : for big allocations, aligned allocation is better */ @@ -407,33 +450,46 @@ typedef struct Byte NumStreams_Defined; Byte NumBlocks_Defined; - Byte DataAfterEnd; + Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream. Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data - UInt64 InSize; // pack size processed + UInt64 InSize; // pack size processed. That value doesn't include the data after + // end of xz stream, if that data was not correct UInt64 OutSize; UInt64 NumStreams; UInt64 NumBlocks; - SRes DecodeRes; - SRes ReadRes; - SRes ProgressRes; - SRes CombinedRes; - SRes CombinedRes_Type; + SRes DecodeRes; // the error code of xz streams data decoding + SRes ReadRes; // error code from ISeqInStream:Read() + SRes ProgressRes; // error code from ICompressProgress:Progress() + SRes CombinedRes; // Combined result error code that shows main rusult + // = S_OK, if there is no error. + // but check also (DataAfterEnd) that can show additional minor errors. + + SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream + // = SZ_ERROR_PROGRESS, if error from ICompressProgress + // = SZ_ERROR_WRITE, if error from ISeqOutStream + // = SZ_ERROR_* codes for decoding } CXzStatInfo; void XzStatInfo_Clear(CXzStatInfo *p); /* + XzDecMt_Decode() -SRes: - SZ_OK - OK +SRes: it's combined decoding result. It also is equal to stat->CombinedRes. + + SZ_OK - no error + check also output value in (stat->DataAfterEnd) + that can show additional possible error + SZ_ERROR_MEM - Memory allocation error SZ_ERROR_NO_ARCHIVE - is not xz archive SZ_ERROR_ARCHIVE - Headers error SZ_ERROR_DATA - Data Error + SZ_ERROR_UNSUPPORTED - Unsupported method or method properties SZ_ERROR_CRC - CRC Error SZ_ERROR_INPUT_EOF - it needs more input data SZ_ERROR_WRITE - ISeqOutStream error @@ -451,8 +507,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle p, // Byte *outBuf, size_t *outBufSize, ISeqInStream *inStream, // const Byte *inData, size_t inDataSize, - CXzStatInfo *stat, - int *isMT, // 0 means that ST (Single-Thread) version was used + CXzStatInfo *stat, // out: decoding results and statistics + int *isMT, // out: 0 means that ST (Single-Thread) version was used + // 1 means that MT (Multi-Thread) version was used ICompressProgress *progress); EXTERN_C_END diff --git a/deps/LZMA-SDK/C/XzCrc64Opt.c b/deps/LZMA-SDK/C/XzCrc64Opt.c index 9273465d4..a0637dd22 100644 --- a/deps/LZMA-SDK/C/XzCrc64Opt.c +++ b/deps/LZMA-SDK/C/XzCrc64Opt.c @@ -1,5 +1,5 @@ /* XzCrc64Opt.c -- CRC64 calculation -2017-06-30 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -9,6 +9,7 @@ #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) { const Byte *p = (const Byte *)data; @@ -16,7 +17,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con v = CRC64_UPDATE_BYTE_2(v, *p); for (; size >= 4; size -= 4, p += 4) { - UInt32 d = (UInt32)v ^ *(const UInt32 *)p; + UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; v = (v >> 32) ^ (table + 0x300)[((d ) & 0xFF)] ^ (table + 0x200)[((d >> 8) & 0xFF)] @@ -45,6 +46,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) +UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) { const Byte *p = (const Byte *)data; @@ -54,7 +56,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size v = CRC64_UPDATE_BYTE_2_BE(v, *p); for (; size >= 4; size -= 4, p += 4) { - UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)p; + UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; v = (v << 32) ^ (table + 0x000)[((d ) & 0xFF)] ^ (table + 0x100)[((d >> 8) & 0xFF)] diff --git a/deps/LZMA-SDK/C/XzDec.c b/deps/LZMA-SDK/C/XzDec.c index 4f5327207..d345f68c1 100644 --- a/deps/LZMA-SDK/C/XzDec.c +++ b/deps/LZMA-SDK/C/XzDec.c @@ -1,5 +1,5 @@ /* XzDec.c -- Xz Decode -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -240,6 +240,7 @@ static SRes BraState_Code2(void *pp, } +SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc); SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc) { CBraState *decoder; @@ -1038,7 +1039,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, (p->outBuf ? NULL : dest), &destLen2, destFinish, src, &srcLen2, srcFinished2, finishMode2); - + *status = p->decoder.status; XzCheck_Update(&p->check, (p->outBuf ? p->outBuf + p->outDataWritten : dest), destLen2); if (!p->outBuf) @@ -1275,9 +1276,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } else { + const Byte *ptr = p->buf; p->state = XZ_STATE_STREAM_FOOTER; p->pos = 0; - if (CRC_GET_DIGEST(p->crc) != GetUi32(p->buf)) + if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr)) return SZ_ERROR_CRC; } break; @@ -1456,7 +1458,6 @@ typedef struct ISeqInStream *inStream; ISeqOutStream *outStream; ICompressProgress *progress; - // CXzStatInfo *stat; BoolInt finishMode; BoolInt outSize_Defined; @@ -1492,8 +1493,9 @@ typedef struct UInt64 numBlocks; // UInt64 numBadBlocks; - SRes mainErrorCode; - + SRes mainErrorCode; // it's set to error code, if the size Code() output doesn't patch the size from Parsing stage + // it can be = SZ_ERROR_INPUT_EOF + // it can be = SZ_ERROR_DATA, in some another cases BoolInt isBlockHeaderState_Parse; BoolInt isBlockHeaderState_Write; UInt64 outProcessed_Parse; @@ -1877,7 +1879,7 @@ static SRes XzDecMt_Callback_PreCode(void *pp, unsigned coderIndex) { // if (res == SZ_ERROR_MEM) return res; if (me->props.ignoreErrors && res != SZ_ERROR_MEM) - return S_OK; + return SZ_OK; return res; } } @@ -1898,15 +1900,18 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex, *outCodePos = coder->outCodeSize; *stop = True; + if (srcSize > coder->inPreSize - coder->inCodeSize) + return SZ_ERROR_FAIL; + if (coder->inCodeSize < coder->inPreHeaderSize) { - UInt64 rem = coder->inPreHeaderSize - coder->inCodeSize; - size_t step = srcSize; - if (step > rem) - step = (size_t)rem; + size_t step = coder->inPreHeaderSize - coder->inCodeSize; + if (step > srcSize) + step = srcSize; src += step; srcSize -= step; coder->inCodeSize += step; + *inCodePos = coder->inCodeSize; if (coder->inCodeSize < coder->inPreHeaderSize) { *stop = False; @@ -1956,7 +1961,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex, { *inCodePos = coder->inPreSize; *outCodePos = coder->outPreSize; - return S_OK; + return SZ_OK; } return coder->codeRes; } @@ -1966,7 +1971,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex, static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, BoolInt needWriteToStream, - const Byte *src, size_t srcSize, + const Byte *src, size_t srcSize, BoolInt isCross, // int srcFinished, BoolInt *needContinue, BoolInt *canRecode) @@ -1985,7 +1990,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (!coder->dec.headerParsedOk || !coder->outBuf) { if (me->finishedDecoderIndex < 0) - me->finishedDecoderIndex = coderIndex; + me->finishedDecoderIndex = (int)coderIndex; return SZ_OK; } @@ -2077,7 +2082,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (coder->codeRes != SZ_OK) if (!me->props.ignoreErrors) { - me->finishedDecoderIndex = coderIndex; + me->finishedDecoderIndex = (int)coderIndex; return res; } @@ -2086,7 +2091,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (coder->inPreSize != coder->inCodeSize || coder->blockPackTotal != coder->inCodeSize) { - me->finishedDecoderIndex = coderIndex; + me->finishedDecoderIndex = (int)coderIndex; return SZ_OK; } @@ -2125,22 +2130,41 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, return SZ_OK; } + /* + We have processed all xz-blocks of stream, + And xz unpacker is at XZ_STATE_BLOCK_HEADER state, where + (src) is a pointer to xz-Index structure. + We finish reading of current xz-Stream, including Zero padding after xz-Stream. + We exit, if we reach extra byte (first byte of new-Stream or another data). + But we don't update input stream pointer for that new extra byte. + If extra byte is not correct first byte of xz-signature, + we have SZ_ERROR_NO_ARCHIVE error here. + */ + res = XzUnpacker_Code(dec, NULL, &outSizeCur, src, &srcProcessed, me->mtc.readWasFinished, // srcFinished CODER_FINISH_END, // CODER_FINISH_ANY, &status); + + // res = SZ_ERROR_ARCHIVE; // for failure test me->status = status; me->codeRes = res; + if (isCross) + me->mtc.crossStart += srcProcessed; + me->mtc.inProcessed += srcProcessed; me->mtc.mtProgress.totalInSize = me->mtc.inProcessed; + srcSize -= srcProcessed; + src += srcProcessed; + if (res != SZ_OK) { - return S_OK; + return SZ_OK; // return res; } @@ -2149,20 +2173,26 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, *needContinue = True; me->isBlockHeaderState_Parse = False; me->isBlockHeaderState_Write = False; + + if (!isCross) { Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc); if (!crossBuf) return SZ_ERROR_MEM; - memcpy(crossBuf, src + srcProcessed, srcSize - srcProcessed); + if (srcSize != 0) + memcpy(crossBuf, src, srcSize); + me->mtc.crossStart = 0; + me->mtc.crossEnd = srcSize; } - me->mtc.crossStart = 0; - me->mtc.crossEnd = srcSize - srcProcessed; + + PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd); + return SZ_OK; } - if (status != CODER_STATUS_NEEDS_MORE_INPUT) + if (status != CODER_STATUS_NEEDS_MORE_INPUT || srcSize != 0) { - return E_FAIL; + return SZ_ERROR_FAIL; } if (me->mtc.readWasFinished) @@ -2174,7 +2204,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, { size_t inPos; size_t inLim; - const Byte *inData; + // const Byte *inData; UInt64 inProgressPrev = me->mtc.inProcessed; // XzDecMt_Prepare_InBuf_ST(p); @@ -2184,9 +2214,8 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, inPos = 0; inLim = 0; - // outProcessed = 0; - inData = crossBuf; + // inData = crossBuf; for (;;) { @@ -2201,7 +2230,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, { inPos = 0; inLim = me->mtc.inBufSize; - me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)inData, &inLim); + me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)crossBuf, &inLim); me->mtc.readProcessed += inLim; if (inLim == 0 || me->mtc.readRes != SZ_OK) me->mtc.readWasFinished = True; @@ -2213,7 +2242,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, res = XzUnpacker_Code(dec, NULL, &outProcessed, - inData + inPos, &inProcessed, + crossBuf + inPos, &inProcessed, (inProcessed == 0), // srcFinished CODER_FINISH_END, &status); @@ -2225,7 +2254,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (res != SZ_OK) { - return S_OK; + return SZ_OK; // return res; } @@ -2240,7 +2269,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, } if (status != CODER_STATUS_NEEDS_MORE_INPUT) - return E_FAIL; + return SZ_ERROR_FAIL; if (me->mtc.progress) { @@ -2276,13 +2305,6 @@ void XzStatInfo_Clear(CXzStatInfo *p) p->NumStreams_Defined = False; p->NumBlocks_Defined = False; - // p->IsArc = False; - // p->UnexpectedEnd = False; - // p->Unsupported = False; - // p->HeadersError = False; - // p->DataError = False; - // p->CrcError = False; - p->DataAfterEnd = False; p->DecodingTruncated = False; @@ -2296,6 +2318,16 @@ void XzStatInfo_Clear(CXzStatInfo *p) +/* + XzDecMt_Decode_ST() can return SZ_OK or the following errors + - SZ_ERROR_MEM for memory allocation error + - error from XzUnpacker_Code() function + - SZ_ERROR_WRITE for ISeqOutStream::Write(). stat->CombinedRes_Type = SZ_ERROR_WRITE in that case + - ICompressProgress::Progress() error, stat->CombinedRes_Type = SZ_ERROR_PROGRESS. + But XzDecMt_Decode_ST() doesn't return ISeqInStream::Read() errors. + ISeqInStream::Read() result is set to p->readRes. + also it can set stat->CombinedRes_Type to SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. +*/ static SRes XzDecMt_Decode_ST(CXzDecMt *p #ifndef _7ZIP_ST @@ -2384,7 +2416,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p inPos = 0; inLim = p->inBufSize; inData = p->inBuf; - p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim); + p->readRes = ISeqInStream_Read(p->inStream, (void *)p->inBuf, &inLim); p->readProcessed += inLim; if (inLim == 0 || p->readRes != SZ_OK) p->readWasFinished = True; @@ -2426,8 +2458,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p if (finished || outProcessed >= outSize) if (outPos != 0) { - size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos); - p->outProcessed += written; + const size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos); + // p->outProcessed += written; // 21.01: BUG fixed if (written != outPos) { stat->CombinedRes_Type = SZ_ERROR_WRITE; @@ -2438,9 +2470,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p if (p->progress && res == SZ_OK) { - UInt64 inDelta = p->inProcessed - inPrev; - UInt64 outDelta = p->outProcessed - outPrev; - if (inDelta >= (1 << 22) || outDelta >= (1 << 22)) + if (p->inProcessed - inPrev >= (1 << 22) || + p->outProcessed - outPrev >= (1 << 22)) { res = ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed); if (res != SZ_OK) @@ -2455,14 +2486,31 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p } if (finished) - return res; + { + // p->codeRes is preliminary error from XzUnpacker_Code. + // and it can be corrected later as final result + // so we return SZ_OK here instead of (res); + return SZ_OK; + // return res; + } } } -static SRes XzStatInfo_SetStat(const CXzUnpacker *dec, + + +/* +XzStatInfo_SetStat() transforms + CXzUnpacker return code and status to combined CXzStatInfo results. + it can convert SZ_OK to SZ_ERROR_INPUT_EOF + it can convert SZ_ERROR_NO_ARCHIVE to SZ_OK and (DataAfterEnd = 1) +*/ + +static void XzStatInfo_SetStat(const CXzUnpacker *dec, int finishMode, - UInt64 readProcessed, UInt64 inProcessed, - SRes res, ECoderStatus status, + // UInt64 readProcessed, + UInt64 inProcessed, + SRes res, // it's result from CXzUnpacker unpacker + ECoderStatus status, BoolInt decodingTruncated, CXzStatInfo *stat) { @@ -2484,12 +2532,20 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec, if (status == CODER_STATUS_NEEDS_MORE_INPUT) { // CODER_STATUS_NEEDS_MORE_INPUT is expected status for correct xz streams + // any extra data is part of correct data extraSize = 0; + // if xz stream was not finished, then we need more data if (!XzUnpacker_IsStreamWasFinished(dec)) res = SZ_ERROR_INPUT_EOF; } - else if (!decodingTruncated || finishMode) // (status == CODER_STATUS_NOT_FINISHED) - res = SZ_ERROR_DATA; + else + { + // CODER_STATUS_FINISHED_WITH_MARK is not possible for multi stream xz decoding + // so he we have (status == CODER_STATUS_NOT_FINISHED) + // if (status != CODER_STATUS_FINISHED_WITH_MARK) + if (!decodingTruncated || finishMode) + res = SZ_ERROR_DATA; + } } else if (res == SZ_ERROR_NO_ARCHIVE) { @@ -2497,24 +2553,29 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec, SZ_ERROR_NO_ARCHIVE is possible for 2 states: XZ_STATE_STREAM_HEADER - if bad signature or bad CRC XZ_STATE_STREAM_PADDING - if non-zero padding data - extraSize / inProcessed don't include "bad" byte + extraSize and inProcessed don't include "bad" byte */ - if (inProcessed != extraSize) // if good streams before error - if (extraSize != 0 || readProcessed != inProcessed) + // if (inProcessed == extraSize), there was no any good xz stream header, and we keep error + if (inProcessed != extraSize) // if there were good xz streams before error + { + // if (extraSize != 0 || readProcessed != inProcessed) { + // he we suppose that all xz streams were finsihed OK, and we have + // some extra data after all streams stat->DataAfterEnd = True; - // there is some good xz stream before. So we set SZ_OK res = SZ_OK; } + } } - stat->DecodeRes = res; + if (stat->DecodeRes == SZ_OK) + stat->DecodeRes = res; stat->InSize -= extraSize; - return res; } + SRes XzDecMt_Decode(CXzDecMtHandle pp, const CXzDecMtProps *props, const UInt64 *outDataSize, int finishMode, @@ -2557,8 +2618,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, p->inProcessed = 0; p->readProcessed = 0; p->readWasFinished = False; + p->readRes = SZ_OK; - p->codeRes = 0; + p->codeRes = SZ_OK; p->status = CODER_STATUS_NOT_SPECIFIED; XzUnpacker_Init(&p->dec); @@ -2589,8 +2651,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, if (p->props.numThreads > 1) { - IMtDecCallback vt; - + IMtDecCallback2 vt; + BoolInt needContinue; + SRes res; // we just free ST buffers here // but we still keep state variables, that was set in XzUnpacker_Init() XzDecMt_FreeSt(p); @@ -2628,45 +2691,45 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, vt.Code = XzDecMt_Callback_Code; vt.Write = XzDecMt_Callback_Write; - { - BoolInt needContinue; - - SRes res = MtDec_Code(&p->mtc); - - stat->InSize = p->mtc.inProcessed; - p->inProcessed = p->mtc.inProcessed; - p->readRes = p->mtc.readRes; - p->readWasFinished = p->mtc.readWasFinished; - p->readProcessed = p->mtc.readProcessed; + res = MtDec_Code(&p->mtc); - tMode = True; - needContinue = False; - if (res == SZ_OK) + stat->InSize = p->mtc.inProcessed; + + p->inProcessed = p->mtc.inProcessed; + p->readRes = p->mtc.readRes; + p->readWasFinished = p->mtc.readWasFinished; + p->readProcessed = p->mtc.readProcessed; + + tMode = True; + needContinue = False; + + if (res == SZ_OK) + { + if (p->mtc.mtProgress.res != SZ_OK) { - if (p->mtc.mtProgress.res != SZ_OK) - { - res = p->mtc.mtProgress.res; - stat->ProgressRes = res; - stat->CombinedRes_Type = SZ_ERROR_PROGRESS; - } - else - needContinue = p->mtc.needContinue; + res = p->mtc.mtProgress.res; + stat->ProgressRes = res; + stat->CombinedRes_Type = SZ_ERROR_PROGRESS; } - - if (!needContinue) + else + needContinue = p->mtc.needContinue; + } + + if (!needContinue) + { { SRes codeRes; BoolInt truncated = False; ECoderStatus status; - CXzUnpacker *dec; + const CXzUnpacker *dec; stat->OutSize = p->outProcessed; if (p->finishedDecoderIndex >= 0) { - CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex]; + const CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex]; codeRes = coder->codeRes; dec = &coder->dec; status = coder->status; @@ -2679,41 +2742,46 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, truncated = p->parsing_Truncated; } else - return E_FAIL; + return SZ_ERROR_FAIL; + + if (p->mainErrorCode != SZ_OK) + stat->DecodeRes = p->mainErrorCode; XzStatInfo_SetStat(dec, p->finishMode, - p->mtc.readProcessed, p->mtc.inProcessed, + // p->mtc.readProcessed, + p->mtc.inProcessed, codeRes, status, truncated, stat); + } - if (res == SZ_OK) + if (res == SZ_OK) + { + stat->ReadRes = p->mtc.readRes; + + if (p->writeRes != SZ_OK) { - if (p->writeRes != SZ_OK) - { - res = p->writeRes; - stat->CombinedRes_Type = SZ_ERROR_WRITE; - } - else if (p->mtc.readRes != SZ_OK && p->mtc.inProcessed == p->mtc.readProcessed) - { - res = p->mtc.readRes; - stat->ReadRes = res; - stat->CombinedRes_Type = SZ_ERROR_READ; - } - else if (p->mainErrorCode != SZ_OK) - { - res = p->mainErrorCode; - } + res = p->writeRes; + stat->CombinedRes_Type = SZ_ERROR_WRITE; } - - stat->CombinedRes = res; - if (stat->CombinedRes_Type == SZ_OK) - stat->CombinedRes_Type = res; - return res; + else if (p->mtc.readRes != SZ_OK + // && p->mtc.inProcessed == p->mtc.readProcessed + && stat->DecodeRes == SZ_ERROR_INPUT_EOF) + { + res = p->mtc.readRes; + stat->CombinedRes_Type = SZ_ERROR_READ; + } + else if (stat->DecodeRes != SZ_OK) + res = stat->DecodeRes; } - - PRF_STR("----- decoding ST -----"); + + stat->CombinedRes = res; + if (stat->CombinedRes_Type == SZ_OK) + stat->CombinedRes_Type = res; + return res; } + + PRF_STR("----- decoding ST -----"); } #endif @@ -2729,33 +2797,35 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, , stat ); + #ifndef _7ZIP_ST + // we must set error code from MT decoding at first + if (p->mainErrorCode != SZ_OK) + stat->DecodeRes = p->mainErrorCode; + #endif + XzStatInfo_SetStat(&p->dec, p->finishMode, - p->readProcessed, p->inProcessed, + // p->readProcessed, + p->inProcessed, p->codeRes, p->status, False, // truncated stat); + stat->ReadRes = p->readRes; + if (res == SZ_OK) { - /* - if (p->writeRes != SZ_OK) - { - res = p->writeRes; - stat->CombinedRes_Type = SZ_ERROR_WRITE; - } - else - */ - if (p->readRes != SZ_OK && p->inProcessed == p->readProcessed) + if (p->readRes != SZ_OK + // && p->inProcessed == p->readProcessed + && stat->DecodeRes == SZ_ERROR_INPUT_EOF) { + // we set read error as combined error, only if that error was the reason + // of decoding problem res = p->readRes; - stat->ReadRes = res; stat->CombinedRes_Type = SZ_ERROR_READ; } - #ifndef _7ZIP_ST - else if (p->mainErrorCode != SZ_OK) - res = p->mainErrorCode; - #endif + else if (stat->DecodeRes != SZ_OK) + res = stat->DecodeRes; } stat->CombinedRes = res; diff --git a/deps/LZMA-SDK/C/XzEnc.c b/deps/LZMA-SDK/C/XzEnc.c index 309eca949..759ba670e 100644 --- a/deps/LZMA-SDK/C/XzEnc.c +++ b/deps/LZMA-SDK/C/XzEnc.c @@ -1,5 +1,5 @@ /* XzEnc.c -- Xz Encode -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -36,7 +36,7 @@ #define XzBlock_ClearFlags(p) (p)->flags = 0; -#define XzBlock_SetNumFilters(p, n) (p)->flags |= ((n) - 1); +#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1)); #define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE; #define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE; @@ -552,7 +552,7 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p) numBlocks++; if (numBlocks < (unsigned)t2) { - t2r = (unsigned)numBlocks; + t2r = (int)numBlocks; if (t2r == 0) t2r = 1; t3 = t1 * t2r; @@ -751,7 +751,8 @@ static SRes Xz_CompressBlock( } else if (fp->ipDefined) { - SetUi32(filter->props, fp->ip); + Byte *ptr = filter->props; + SetUi32(ptr, fp->ip); filter->propsSize = 4; } } @@ -1196,7 +1197,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr p->outBufSize = destBlockSize; } - p->mtCoder.numThreadsMax = props->numBlockThreads_Max; + p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max; p->mtCoder.expectedDataSize = p->expectedDataSize; RINOK(MtCoder_Code(&p->mtCoder)); diff --git a/deps/LZMA-SDK/C/XzIn.c b/deps/LZMA-SDK/C/XzIn.c index 792a61786..54d81c4a4 100644 --- a/deps/LZMA-SDK/C/XzIn.c +++ b/deps/LZMA-SDK/C/XzIn.c @@ -1,5 +1,5 @@ /* XzIn.c - Xz input -2018-07-04 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -152,7 +152,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff { UInt64 indexSize; Byte buf[XZ_STREAM_FOOTER_SIZE]; - UInt64 pos = *startOffset; + UInt64 pos = (UInt64)*startOffset; if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE) return SZ_ERROR_NO_ARCHIVE; @@ -202,8 +202,13 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff if (!XzFlags_IsSupported(p->flags)) return SZ_ERROR_UNSUPPORTED; - if (GetUi32(buf) != CrcCalc(buf + 4, 6)) - return SZ_ERROR_ARCHIVE; + { + /* to eliminate GCC 6.3 warning: + dereferencing type-punned pointer will break strict-aliasing rules */ + const Byte *buf_ptr = buf; + if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6)) + return SZ_ERROR_ARCHIVE; + } indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2; @@ -222,7 +227,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff return SZ_ERROR_ARCHIVE; pos -= (totalSize + XZ_STREAM_HEADER_SIZE); RINOK(LookInStream_SeekTo(stream, pos)); - *startOffset = pos; + *startOffset = (Int64)pos; } { CXzStreamFlags headerFlags; @@ -294,12 +299,12 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr SRes res; Xz_Construct(&st); res = Xz_ReadBackward(&st, stream, startOffset, alloc); - st.startOffset = *startOffset; + st.startOffset = (UInt64)*startOffset; RINOK(res); if (p->num == p->numAllocated) { - size_t newNum = p->num + p->num / 4 + 1; - Byte *data = (Byte *)ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream)); + const size_t newNum = p->num + p->num / 4 + 1; + void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream)); if (!data) return SZ_ERROR_MEM; p->numAllocated = newNum; @@ -311,8 +316,8 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr p->streams[p->num++] = st; if (*startOffset == 0) break; - RINOK(LookInStream_SeekTo(stream, *startOffset)); - if (progress && ICompressProgress_Progress(progress, endOffset - *startOffset, (UInt64)(Int64)-1) != SZ_OK) + RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)); + if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK) return SZ_ERROR_PROGRESS; } return SZ_OK; diff --git a/deps/LZMA-SDK/C/var_clang.mak b/deps/LZMA-SDK/C/var_clang.mak new file mode 100644 index 000000000..ee265698e --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang.mak @@ -0,0 +1,11 @@ +PLATFORM= +O=b/c +IS_X64= +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM= +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/var_clang_arm64.mak b/deps/LZMA-SDK/C/var_clang_arm64.mak new file mode 100644 index 000000000..1e82d2eb6 --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang_arm64.mak @@ -0,0 +1,11 @@ +PLATFORM=arm64 +O=b/c_$(PLATFORM) +IS_X64= +IS_X86= +IS_ARM64=1 +CROSS_COMPILE= +MY_ARCH= +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/var_clang_x64.mak b/deps/LZMA-SDK/C/var_clang_x64.mak new file mode 100644 index 000000000..d9013e1cd --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang_x64.mak @@ -0,0 +1,12 @@ +PLATFORM=x64 +O=b/c_$(PLATFORM) +IS_X64=1 +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 + diff --git a/deps/LZMA-SDK/C/var_clang_x86.mak b/deps/LZMA-SDK/C/var_clang_x86.mak new file mode 100644 index 000000000..9ab916a70 --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang_x86.mak @@ -0,0 +1,12 @@ +PLATFORM=x86 +O=b/c_$(PLATFORM) +IS_X64= +IS_X86=1 +IS_ARM64= +CROSS_COMPILE= +MY_ARCH=-m32 +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 + diff --git a/deps/LZMA-SDK/C/var_gcc.mak b/deps/LZMA-SDK/C/var_gcc.mak new file mode 100644 index 000000000..803c8de9d --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc.mak @@ -0,0 +1,12 @@ +PLATFORM= +O=b/g +IS_X64= +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM= +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ + +# -march=armv8-a+crc+crypto diff --git a/deps/LZMA-SDK/C/var_gcc_arm64.mak b/deps/LZMA-SDK/C/var_gcc_arm64.mak new file mode 100644 index 000000000..562cfaa6d --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc_arm64.mak @@ -0,0 +1,12 @@ +PLATFORM=arm64 +O=b/g_$(PLATFORM) +IS_X64= +IS_X86= +IS_ARM64=1 +CROSS_COMPILE= +MY_ARCH=-mtune=cortex-a53 +USE_ASM=1 +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ + +# -march=armv8-a+crc+crypto diff --git a/deps/LZMA-SDK/C/var_gcc_x64.mak b/deps/LZMA-SDK/C/var_gcc_x64.mak new file mode 100644 index 000000000..1b965b21b --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc_x64.mak @@ -0,0 +1,10 @@ +PLATFORM=x64 +O=b/g_$(PLATFORM) +IS_X64=1 +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM=1 +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ diff --git a/deps/LZMA-SDK/C/var_gcc_x86.mak b/deps/LZMA-SDK/C/var_gcc_x86.mak new file mode 100644 index 000000000..9eada64e1 --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc_x86.mak @@ -0,0 +1,11 @@ +PLATFORM=x86 +O=b/g_$(PLATFORM) +IS_X64= +IS_X86=1 +IS_ARM64= +CROSS_COMPILE= +MY_ARCH=-m32 +USE_ASM=1 +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ + diff --git a/deps/LZMA-SDK/C/var_mac_arm64.mak b/deps/LZMA-SDK/C/var_mac_arm64.mak new file mode 100644 index 000000000..0ba414230 --- /dev/null +++ b/deps/LZMA-SDK/C/var_mac_arm64.mak @@ -0,0 +1,11 @@ +PLATFORM=arm64 +O=b/m_$(PLATFORM) +IS_X64= +IS_X86= +IS_ARM64=1 +CROSS_COMPILE= +MY_ARCH=-arch arm64 +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/var_mac_x64.mak b/deps/LZMA-SDK/C/var_mac_x64.mak new file mode 100644 index 000000000..92b15c8b7 --- /dev/null +++ b/deps/LZMA-SDK/C/var_mac_x64.mak @@ -0,0 +1,11 @@ +PLATFORM=x64 +O=b/m_$(PLATFORM) +IS_X64=1 +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH=-arch x86_64 +USE_ASM= +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/warn_clang.mak b/deps/LZMA-SDK/C/warn_clang.mak new file mode 100644 index 000000000..a299fbc4d --- /dev/null +++ b/deps/LZMA-SDK/C/warn_clang.mak @@ -0,0 +1,37 @@ +CFLAGS_WARN_CLANG_3_8_UNIQ = \ + -Wno-reserved-id-macro \ + -Wno-old-style-cast \ + -Wno-c++11-long-long \ + -Wno-unused-macros \ + +CFLAGS_WARN_CLANG_3_8 = \ + $(CFLAGS_WARN_CLANG_3_8_UNIQ) \ + -Weverything \ + -Wno-extra-semi \ + -Wno-sign-conversion \ + -Wno-language-extension-token \ + -Wno-global-constructors \ + -Wno-non-virtual-dtor \ + -Wno-switch-enum \ + -Wno-covered-switch-default \ + -Wno-cast-qual \ + -Wno-padded \ + -Wno-exit-time-destructors \ + -Wno-weak-vtables \ + +CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \ + -Wno-extra-semi-stmt \ + -Wno-zero-as-null-pointer-constant \ + -Wno-deprecated-dynamic-exception-spec \ + -Wno-c++98-compat-pedantic \ + -Wno-atomic-implicit-seq-cst \ + -Wconversion \ + -Wno-sign-conversion \ + +CFLAGS_WARN_1 = \ + -Wno-deprecated-copy-dtor \ + + + + +CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_1) diff --git a/deps/LZMA-SDK/C/warn_clang_mac.mak b/deps/LZMA-SDK/C/warn_clang_mac.mak new file mode 100644 index 000000000..cfbbda073 --- /dev/null +++ b/deps/LZMA-SDK/C/warn_clang_mac.mak @@ -0,0 +1,37 @@ +CFLAGS_WARN_CLANG_3_8_UNIQ = \ + -Wno-reserved-id-macro \ + -Wno-old-style-cast \ + -Wno-c++11-long-long \ + -Wno-unused-macros \ + +CFLAGS_WARN_CLANG_3_8 = \ + $(CFLAGS_WARN_CLANG_3_8_UNIQ) \ + -Weverything \ + -Wno-extra-semi \ + -Wno-sign-conversion \ + -Wno-language-extension-token \ + -Wno-global-constructors \ + -Wno-non-virtual-dtor \ + -Wno-switch-enum \ + -Wno-covered-switch-default \ + -Wno-cast-qual \ + -Wno-padded \ + -Wno-exit-time-destructors \ + -Wno-weak-vtables \ + +CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \ + -Wno-extra-semi-stmt \ + -Wno-zero-as-null-pointer-constant \ + -Wno-deprecated-dynamic-exception-spec \ + -Wno-c++98-compat-pedantic \ + -Wno-atomic-implicit-seq-cst \ + -Wconversion \ + -Wno-sign-conversion \ + +CFLAGS_WARN_MAC = \ + -Wno-poison-system-directories \ + -Wno-c++11-long-long \ + -Wno-atomic-implicit-seq-cst \ + + +CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_MAC) diff --git a/deps/LZMA-SDK/C/warn_gcc.mak b/deps/LZMA-SDK/C/warn_gcc.mak new file mode 100644 index 000000000..3ae796480 --- /dev/null +++ b/deps/LZMA-SDK/C/warn_gcc.mak @@ -0,0 +1,53 @@ +CFLAGS_WARN_GCC_4_5 = \ + +CFLAGS_WARN_GCC_6 = \ + -Waddress \ + -Waggressive-loop-optimizations \ + -Wattributes \ + -Wbool-compare \ + -Wcast-align \ + -Wcomment \ + -Wdiv-by-zero \ + -Wduplicated-cond \ + -Wformat-contains-nul \ + -Winit-self \ + -Wint-to-pointer-cast \ + -Wunused \ + -Wunused-macros \ + +# -Wno-strict-aliasing + +CFLAGS_WARN_GCC_9 = \ + -Waddress \ + -Waddress-of-packed-member \ + -Waggressive-loop-optimizations \ + -Wattributes \ + -Wbool-compare \ + -Wbool-operation \ + -Wcast-align \ + -Wcast-align=strict \ + -Wcomment \ + -Wdangling-else \ + -Wdiv-by-zero \ + -Wduplicated-branches \ + -Wduplicated-cond \ + -Wformat-contains-nul \ + -Wimplicit-fallthrough=5 \ + -Winit-self \ + -Wint-in-bool-context \ + -Wint-to-pointer-cast \ + -Wunused \ + -Wunused-macros \ + -Wconversion \ + +# -Wno-sign-conversion \ + +CFLAGS_WARN_GCC_PPMD_UNALIGNED = \ + -Wno-strict-aliasing \ + + +CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \ + +# $(CFLAGS_WARN_GCC_PPMD_UNALIGNED) + + \ No newline at end of file diff --git a/deps/LZMA-SDK/DOC/7zFormat.txt b/deps/LZMA-SDK/DOC/7zFormat.txt new file mode 100644 index 000000000..9239e9355 --- /dev/null +++ b/deps/LZMA-SDK/DOC/7zFormat.txt @@ -0,0 +1,469 @@ +7z Format description (18.06) +---------------------------- + +This file contains description of 7z archive format. +7z archive can contain files compressed with any method. +See "Methods.txt" for description for defined compressing methods. + + +Format structure Overview +------------------------- + +Some fields can be optional. + +Archive structure +~~~~~~~~~~~~~~~~~ +SignatureHeader +[PackedStreams] +[PackedStreamsForHeaders] +[ + Header + or + { + Packed Header + HeaderInfo + } +] + + + +Header structure +~~~~~~~~~~~~~~~~ +{ + ArchiveProperties + AdditionalStreams + { + PackInfo + { + PackPos + NumPackStreams + Sizes[NumPackStreams] + CRCs[NumPackStreams] + } + CodersInfo + { + NumFolders + Folders[NumFolders] + { + NumCoders + CodersInfo[NumCoders] + { + ID + NumInStreams; + NumOutStreams; + PropertiesSize + Properties[PropertiesSize] + } + NumBindPairs + BindPairsInfo[NumBindPairs] + { + InIndex; + OutIndex; + } + PackedIndices + } + UnPackSize[Folders][Folders.NumOutstreams] + CRCs[NumFolders] + } + SubStreamsInfo + { + NumUnPackStreamsInFolders[NumFolders]; + UnPackSizes[] + CRCs[] + } + } + MainStreamsInfo + { + (Same as in AdditionalStreams) + } + FilesInfo + { + NumFiles + Properties[] + { + ID + Size + Data + } + } +} + +HeaderInfo structure +~~~~~~~~~~~~~~~~~~~~ +{ + (Same as in AdditionalStreams) +} + + + +Notes about Notation and encoding +--------------------------------- + +7z uses little endian encoding. + +7z archive format has optional headers that are marked as +[] +Header +[] + +REAL_UINT64 means real UINT64. + +UINT64 means real UINT64 encoded with the following scheme: + + Size of encoding sequence depends from first byte: + First_Byte Extra_Bytes Value + (binary) + 0xxxxxxx : ( xxxxxxx ) + 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y + 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y + ... + 1111110x BYTE y[6] : ( x << (8 * 6)) + y + 11111110 BYTE y[7] : y + 11111111 BYTE y[8] : y + + + +Property IDs +------------ + +0x00 = kEnd + +0x01 = kHeader + +0x02 = kArchiveProperties + +0x03 = kAdditionalStreamsInfo +0x04 = kMainStreamsInfo +0x05 = kFilesInfo + +0x06 = kPackInfo +0x07 = kUnPackInfo +0x08 = kSubStreamsInfo + +0x09 = kSize +0x0A = kCRC + +0x0B = kFolder + +0x0C = kCodersUnPackSize +0x0D = kNumUnPackStream + +0x0E = kEmptyStream +0x0F = kEmptyFile +0x10 = kAnti + +0x11 = kName +0x12 = kCTime +0x13 = kATime +0x14 = kMTime +0x15 = kWinAttributes +0x16 = kComment + +0x17 = kEncodedHeader + +0x18 = kStartPos +0x19 = kDummy + + +7z format headers +----------------- + +SignatureHeader +~~~~~~~~~~~~~~~ + BYTE kSignature[6] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + + ArchiveVersion + { + BYTE Major; // now = 0 + BYTE Minor; // now = 4 + }; + + UINT32 StartHeaderCRC; + + StartHeader + { + REAL_UINT64 NextHeaderOffset + REAL_UINT64 NextHeaderSize + UINT32 NextHeaderCRC + } + + +........................... + + +ArchiveProperties +~~~~~~~~~~~~~~~~~ +BYTE NID::kArchiveProperties (0x02) +for (;;) +{ + BYTE PropertyType; + if (aType == 0) + break; + UINT64 PropertySize; + BYTE PropertyData[PropertySize]; +} + + +Digests (NumStreams) +~~~~~~~~~~~~~~~~~~~~~ + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumStreams) + BIT Defined + } + UINT32 CRCs[NumDefined] + + +PackInfo +~~~~~~~~~~~~ + BYTE NID::kPackInfo (0x06) + UINT64 PackPos + UINT64 NumPackStreams + + [] + BYTE NID::kSize (0x09) + UINT64 PackSizes[NumPackStreams] + [] + + [] + BYTE NID::kCRC (0x0A) + PackStreamDigests[NumPackStreams] + [] + + BYTE NID::kEnd + + +Folder +~~~~~~ + UINT64 NumCoders; + for (NumCoders) + { + BYTE + { + 0:3 CodecIdSize + 4: Is Complex Coder + 5: There Are Attributes + 6: Reserved + 7: There are more alternative methods. (Not used anymore, must be 0). + } + BYTE CodecId[CodecIdSize] + if (Is Complex Coder) + { + UINT64 NumInStreams; + UINT64 NumOutStreams; + } + if (There Are Attributes) + { + UINT64 PropertiesSize + BYTE Properties[PropertiesSize] + } + } + + NumBindPairs = NumOutStreamsTotal - 1; + + for (NumBindPairs) + { + UINT64 InIndex; + UINT64 OutIndex; + } + + NumPackedStreams = NumInStreamsTotal - NumBindPairs; + if (NumPackedStreams > 1) + for(NumPackedStreams) + { + UINT64 Index; + }; + + + + +Coders Info +~~~~~~~~~~~ + + BYTE NID::kUnPackInfo (0x07) + + + BYTE NID::kFolder (0x0B) + UINT64 NumFolders + BYTE External + switch(External) + { + case 0: + Folders[NumFolders] + case 1: + UINT64 DataStreamIndex + } + + + BYTE ID::kCodersUnPackSize (0x0C) + for(Folders) + for(Folder.NumOutStreams) + UINT64 UnPackSize; + + + [] + BYTE NID::kCRC (0x0A) + UnPackDigests[NumFolders] + [] + + + + BYTE NID::kEnd + + + +SubStreams Info +~~~~~~~~~~~~~~ + BYTE NID::kSubStreamsInfo; (0x08) + + [] + BYTE NID::kNumUnPackStream; (0x0D) + UINT64 NumUnPackStreamsInFolders[NumFolders]; + [] + + + [] + BYTE NID::kSize (0x09) + UINT64 UnPackSizes[] + [] + + + [] + BYTE NID::kCRC (0x0A) + Digests[Number of streams with unknown CRC] + [] + + + BYTE NID::kEnd + + +Streams Info +~~~~~~~~~~~~ + + [] + PackInfo + [] + + + [] + CodersInfo + [] + + + [] + SubStreamsInfo + [] + + BYTE NID::kEnd + + +FilesInfo +~~~~~~~~~ + BYTE NID::kFilesInfo; (0x05) + UINT64 NumFiles + + for (;;) + { + BYTE PropertyType; + if (aType == 0) + break; + + UINT64 Size; + + switch(PropertyType) + { + kEmptyStream: (0x0E) + for(NumFiles) + BIT IsEmptyStream + + kEmptyFile: (0x0F) + for(EmptyStreams) + BIT IsEmptyFile + + kAnti: (0x10) + for(EmptyStreams) + BIT IsAntiFile + + case kCTime: (0x12) + case kATime: (0x13) + case kMTime: (0x14) + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumFiles) + BIT TimeDefined + } + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Definded Items) + REAL_UINT64 Time + [] + + kNames: (0x11) + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Files) + { + wchar_t Names[NameSize]; + wchar_t 0; + } + [] + + kAttributes: (0x15) + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumFiles) + BIT AttributesAreDefined + } + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Definded Attributes) + UINT32 Attributes + [] + } + } + + +Header +~~~~~~ + BYTE NID::kHeader (0x01) + + [] + ArchiveProperties + [] + + [] + BYTE NID::kAdditionalStreamsInfo; (0x03) + StreamsInfo + [] + + [] + BYTE NID::kMainStreamsInfo; (0x04) + StreamsInfo + [] + + [] + FilesInfo + [] + + BYTE NID::kEnd + + +HeaderInfo +~~~~~~~~~~ + [] + BYTE NID::kEncodedHeader; (0x17) + StreamsInfo for Encoded Header + [] + + +--- +End of document diff --git a/deps/LZMA-SDK/DOC/Methods.txt b/deps/LZMA-SDK/DOC/Methods.txt new file mode 100644 index 000000000..6d0641bae --- /dev/null +++ b/deps/LZMA-SDK/DOC/Methods.txt @@ -0,0 +1,173 @@ +7-Zip method IDs for 7z and xz archives +--------------------------------------- + +Version: 18.06 +Date: 2018-06-30 + +Each compression or crypto method in 7z is associated with unique binary value (ID). +The length of ID in bytes is arbitrary but it can not exceed 63 bits (8 bytes). + +xz and 7z formats use same ID map. + +If you want to add some new ID, you have two ways: + 1) Write request for allocating IDs to 7-Zip developers. + 2) Generate 8-bytes ID: + + 3F ZZ ZZ ZZ ZZ ZZ MM MM + + 3F - Prefix for random IDs (1 byte) + ZZ ZZ ZZ ZZ ZZ - Developer ID (5 bytes). Use real random bytes. + + MM MM - Method ID (2 bytes) + + You can notify 7-Zip developers about your Developer ID / Method ID. + + Note: Use new ID, if old codec can not decode data encoded with new version. + + +List of defined IDs +------------------- + +00 - Copy + +03 - Delta +04 - BCJ (x86) +05 - PPC (big-endian) +06 - IA64 +07 - ARM (little-endian) +08 - ARMT (little-endian) +09 - SPARC + +21 - LZMA2 + +02.. - Common + 03 [Swap] + - 2 Swap2 + - 4 Swap4 + +03.. - 7z + 01 - + 01 - LZMA + + 03 - [Branch Codecs] + 01 - [x86 Codecs] + 03 - BCJ + 1B - BCJ2 (4 packed streams) + 02 - + 05 - PPC (big-endian) + 03 - + 01 - Alpha + 04 - + 01 - IA64 + 05 - + 01 - ARM (little-endian) + 06 - + 05 - M68 (big-endian) + 07 - + 01 - ARMT (little-endian) + 08 - + 05 - SPARC + + 04 - + 01 - PPMD + + 7F - + 01 - experimental method. + + +04.. - Misc codecs + + 00 - Reserved + + 01 - [Zip] + 00 - Copy (not used. Use {00} instead) + 01 - Shrink + 06 - Implode + 08 - Deflate + 09 - Deflate64 + 0A - Imploding + 0C - BZip2 (not used. Use {040202} instead) + 0E - LZMA (LZMA-zip) + 5F - xz + 60 - Jpeg + 61 - WavPack + 62 - PPMd (PPMd-zip) + 63 - wzAES + + 02 - + 02 - BZip2 + + 03 - [Rar] + 01 - Rar1 + 02 - Rar2 + 03 - Rar3 + 05 - Rar5 + + 04 - [Arj] + 01 - Arj(1,2,3) + 02 - Arj4 + + 05 - [Z] + + 06 - [Lzh] + + 07 - Reserved for 7z + + 08 - [Cab] + + 09 - [NSIS] + 01 - DeflateNSIS + 02 - BZip2NSIS + + F7 - External codecs (that are not included to 7-Zip) + + 0x xx - reserved + + 10 xx - reserved (LZHAM) + 01 - LZHAM + + 11 xx - reserved (Tino Reichardt) + 01 - ZSTD + 02 - BROTLI + 04 - LZ4 + 05 - LZ5 + 06 - LIZARD + + 12 xx - reserverd (Denis Anisimov) + + 01 - WavPack2 + FE - eSplitter + FF - RawSplitter + + +06.. - Crypto + + F0 - Ciphers without hashing algo + + 01 - [AES] + 0x - AES-128 + 4x - AES-192 + 8x - AES-256 + Cx - AES + + x0 - ECB + x1 - CBC + x2 - CFB + x3 - OFB + x4 - CTR + + F1 - Combine Ciphers + + 01 - [Zip] + 01 - ZipCrypto (Main Zip crypto algo) + + 03 - [RAR] + 02 - + 03 - Rar29AES (AES-128 + modified SHA-1) + + 07 - [7z] + 01 - 7zAES (AES-256 + SHA-256) + + +--- +End of document diff --git a/deps/LZMA-SDK/DOC/installer.txt b/deps/LZMA-SDK/DOC/installer.txt new file mode 100644 index 000000000..70ad7dc6a --- /dev/null +++ b/deps/LZMA-SDK/DOC/installer.txt @@ -0,0 +1,166 @@ +7-Zip for installers 9.38 +------------------------- + +7-Zip is a file archiver for Windows NT/2000/2003/2008/XP/Vista/7/8/10. + +7-Zip for installers is part of LZMA SDK. +LZMA SDK is written and placed in the public domain by Igor Pavlov. + +It's allowed to join 7-Zip SFX module with another software. +It's allowed to change resources of 7-Zip's SFX modules. + + +HOW to use +----------- + +7zr.exe is reduced version of 7za.exe of 7-Zip. +7zr.exe supports only format with these codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, Copy. + +Example of compressing command for installation packages: + +7zr a archive.7z files + +7zSD.sfx is SFX module for installers. 7zSD.sfx uses msvcrt.dll. + +SFX modules for installers allow to create installation program. +Such module extracts archive to temp folder and then runs specified program and removes +temp files after program finishing. Self-extract archive for installers must be created +as joining 3 files: SFX_Module, Installer_Config, 7z_Archive. +Installer_Config is optional file. You can use the following command to create installer +self-extract archive: + +copy /b 7zSD.sfx + config.txt + archive.7z archive.exe + +The smallest installation package size can be achieved, if installation files was +uncompressed before including to 7z archive. + +-y switch for installer module (at runtime) specifies quiet mode for extracting. + +Installer Config file format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Config file contains commands for Installer. File begins from string +;!@Install@!UTF-8! and ends with ;!@InstallEnd@!. File must be written +in UTF-8 encoding. File contains string pairs: + +ID_String="Value" + +ID_String Description + +Title Title for messages +BeginPrompt Begin Prompt message +Progress Value can be "yes" or "no". Default value is "yes". +RunProgram Command for executing. Default value is "setup.exe". + Substring %%T will be replaced with path to temporary + folder, where files were extracted +Directory Directory prefix for "RunProgram". Default value is ".\\" +ExecuteFile Name of file for executing +ExecuteParameters Parameters for "ExecuteFile" + + +You can omit any string pair. + +There are two ways to run program: RunProgram and ExecuteFile. +Use RunProgram, if you want to run some program from .7z archive. +Use ExecuteFile, if you want to open some document from .7z archive or +if you want to execute some command from Windows. + +If you use RunProgram and if you specify empty directory prefix: Directory="", +the system searches for the executable file in the following sequence: + +1. The directory from which the application (installer) loaded. +2. The temporary folder, where files were extracted. +3. The Windows system directory. + + +Config file Examples +~~~~~~~~~~~~~~~~~~~~ + +;!@Install@!UTF-8! +Title="7-Zip 4.00" +BeginPrompt="Do you want to install the 7-Zip 4.00?" +RunProgram="setup.exe" +;!@InstallEnd@! + + + +;!@Install@!UTF-8! +Title="7-Zip 4.00" +BeginPrompt="Do you want to install the 7-Zip 4.00?" +ExecuteFile="7zip.msi" +;!@InstallEnd@! + + + +;!@Install@!UTF-8! +Title="7-Zip 4.01 Update" +BeginPrompt="Do you want to install the 7-Zip 4.01 Update?" +ExecuteFile="msiexec.exe" +ExecuteParameters="/i 7zip.msi REINSTALL=ALL REINSTALLMODE=vomus" +;!@InstallEnd@! + + + +Small SFX modules for installers +-------------------------------- + +7zS2.sfx - small SFX module (GUI version) +7zS2con.sfx - small SFX module (Console version) + +Small SFX modules support this codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, COPY + +Small SFX module is similar to common SFX module for installers. +The difference (what's new in small version): + - Smaller size (30 KB vs 100 KB) + - C source code instead of Ñ++ + - No installer Configuration file + - No extracting progress window + - It decompresses solid 7z blocks (it can be whole 7z archive) to RAM. + So user that calls SFX installer must have free RAM of size of largest + solid 7z block (size of 7z archive at simplest case). + +How to use +---------- + +copy /b 7zS2.sfx + archive.7z sfx.exe + +When you run installer sfx module (sfx.exe) +1) It creates "7zNNNNNNNN" temp folder in system temp folder. +2) It extracts .7z archive to that folder +3) It executes one file from "7zNNNNNNNN" temp folder. +4) It removes "7zNNNNNNNN" temp folder + +You can send parameters to installer, and installer will transfer them to extracted .exe file. + +Small SFX uses 3 levels of priorities to select file to execute: + + 1) Files in root folder have higher priority than files in subfolders. + 2) File extension priorities (from high to low priority order): + bat, cmd, exe, inf, msi, cab (under Windows CE), html, htm + 3) File name priorities (from high to low priority order): + setup, install, run, start + +Windows CE (ARM) version of 7zS2.sfx is included to 7-Zip for Windows Mobile package. + + +Examples +-------- + +1) To create compressed console 7-Zip: + +7zr a c.7z 7z.exe 7z.dll -mx +copy /b 7zS2con.sfx + c.7z 7zCompr.exe +7zCompr.exe b -md22 + + +2) To create compressed GUI 7-Zip: + +7zr a g.7z 7zg.exe 7z.dll -mx +copy /b 7zS2.sfx + g.7z 7zgCompr.exe +7zgCompr.exe b -md22 + + +3) To open some file: + +7zr a h.7z readme.txt -mx +copy /b 7zS2.sfx + h.7z 7zTxt.exe +7zTxt.exe diff --git a/deps/LZMA-SDK/DOC/lzma-history.txt b/deps/LZMA-SDK/DOC/lzma-history.txt new file mode 100644 index 000000000..3fc19fd8b --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma-history.txt @@ -0,0 +1,484 @@ +HISTORY of the LZMA SDK +----------------------- + +21.02 alpha 2021-05-06 +------------------------- +- The command line version of 7-Zip for macOS was released. +- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux + was increased by 20%-60%. + + +21.01 alpha 2021-03-09 +------------------------- +- The command line version of 7-Zip for Linux was released. +- The improvements for speed of ARM64 version using hardware CPU instructions + for AES, CRC-32, SHA-1 and SHA-256. +- Some bugs were fixed. + + +20.02 alpha 2020-08-08 +------------------------- +- The default number of LZMA2 chunks per solid block in 7z archive was increased to 64. + It allows to increase the compression speed for big 7z archives, if there is a big number + of CPU cores and threads. +- The speed of PPMd compressing/decompressing was increased for 7z archives. +- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system + to modify "Last Access Time" property of source files for archiving and hashing operations. +- Some bugs were fixed. + + +20.00 alpha 2020-02-06 +------------------------- +- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5, + that can work faster than bt4 and hc4 match finders for the data with big redundancy. +- The compression ratio was improved for Fast and Fastest compression levels with the + following default settings: + - Fastest level (-mx1) : hc5 match finder with 256 KB dictionary. + - Fast level (-mx3) : hc5 match finder with 4 MB dictionary. +- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra + compression levels. + + +19.00 2019-02-21 +------------------------- +- Encryption strength for 7z archives was increased: + the size of random initialization vector was increased from 64-bit to 128-bit, + and the pseudo-random number generator was improved. +- The bug in 7zIn.c code was fixed. + + +18.06 2018-12-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased by 3-10%, + and there are minor changes in compression ratio. +- Some bugs were fixed. +- The bug in 7-Zip 18.02-18.05 was fixed: + There was memory leak in multithreading xz decoder - XzDecMt_Decode(), + if xz stream contains only one block. +- The changes for MSVS compiler makefiles: + - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64) + instead of "CPU" macroname with values (AMD64, ARM64). + - the makefiles by default now use static version of the run-time library. + + +18.05 2018-04-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased + by 8% for fastest/fast compression levels and + by 3% for normal/maximum compression levels. +- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in + Windows 10 because of some BUG with "Large Pages" in Windows 10. + Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299). +- The BUG was fixed in Lzma2Enc.c + Lzma2Enc_Encode2() function worked incorretly, + if (inStream == NULL) and the number of block threads is more than 1. + + +18.03 beta 2018-03-04 +------------------------- +- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm + for x64 with about 30% higher speed than main version of LZMA decoder written in C. +- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%. +- 7-Zip now can use multi-threading for 7z/LZMA2 decoding, + if there are multiple independent data chunks in LZMA2 stream. +- 7-Zip now can use multi-threading for xz decoding, + if there are multiple blocks in xz stream. + + +18.01 2019-01-28 +------------------------- +- The BUG in 17.01 - 18.00 beta was fixed: + XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished() + didn't work correctly for xz archives without checksum (CRC). + + +18.00 beta 2019-01-10 +------------------------- +- The BUG in xz encoder was fixed: + There was memory leak of 16 KB for each file compressed with + xz compression method, if additional filter was used. + + +17.01 beta 2017-08-28 +------------------------- +- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression. + 7-Zip now uses additional memory buffers for multi-block LZMA2 compression. + CPU utilization was slightly improved. +- 7-zip now creates multi-block xz archives by default. Block size can be + specified with -ms[Size]{m|g} switch. +- xz decoder now can unpack random block from multi-block xz archives. +- 7-Zip command line: @listfile now doesn't work after -- switch. + Use -i@listfile before -- switch instead. +- The BUGs were fixed: + 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive. + + +17.00 beta 2017-04-29 +------------------------- +- NewHandler.h / NewHandler.cpp: + now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900). +- C/7zTypes.h : the names of variables in interface structures were changed (vt). +- Some bugs were fixed. 7-Zip could crash in some cases. +- Some internal changes in code. + + +16.04 2016-10-04 +------------------------- +- The bug was fixed in DllSecur.c. + + +16.03 2016-09-28 +------------------------- +- SFX modules now use some protection against DLL preloading attack. +- Some bugs in 7z code were fixed. + + +16.02 2016-05-21 +------------------------- +- The BUG in 16.00 - 16.01 was fixed: + Split Handler (SplitHandler.cpp) returned incorrect + total size value (kpidSize) for split archives. + + +16.01 2016-05-19 +------------------------- +- Some internal changes to reduce the number of compiler warnings. + + +16.00 2016-05-10 +------------------------- +- Some bugs were fixed. + + +15.12 2015-11-19 +------------------------- +- The BUG in C version of 7z decoder was fixed: + 7zDec.c : SzDecodeLzma2() + 7z decoder could mistakenly report about decoding error for some 7z archives + that use LZMA2 compression method. + The probability to get that mistaken decoding error report was about + one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). +- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed: + 7zArcIn.c : SzReadHeader2() + 7z decoder worked incorrectly for 7z archives that contain + empty solid blocks, that can be placed to 7z archive, if some file is + unavailable for reading during archive creation. + + +15.09 beta 2015-10-16 +------------------------- +- The BUG in LZMA / LZMA2 encoding code was fixed. + The BUG in LzFind.c::MatchFinder_ReadBlock() function. + If input data size is larger than (4 GiB - dictionary_size), + the following code worked incorrectly: + - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions + for compressing from memory to memory. + That BUG is not related to LZMA encoder version that works via streams. + - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if + default value of chunk size (CLzma2EncProps::blockSize) is changed + to value larger than (4 GiB - dictionary_size). + + +9.38 beta 2015-01-03 +------------------------- +- The BUG in 9.31-9.37 was fixed: + IArchiveGetRawProps interface was disabled for 7z archives. +- The BUG in 9.26-9.36 was fixed: + Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows. + + +9.36 beta 2014-12-26 +------------------------- +- The BUG in command line version was fixed: + 7-Zip created temporary archive in current folder during update archive + operation, if -w{Path} switch was not specified. + The fixed 7-Zip creates temporary archive in folder that contains updated archive. +- The BUG in 9.33-9.35 was fixed: + 7-Zip silently ignored file reading errors during 7z or gz archive creation, + and the created archive contained only part of file that was read before error. + The fixed 7-Zip stops archive creation and it reports about error. + + +9.35 beta 2014-12-07 +------------------------- +- 7zr.exe now support AES encryption. +- SFX mudules were added to LZMA SDK +- Some bugs were fixed. + + +9.21 beta 2011-04-11 +------------------------- +- New class FString for file names at file systems. +- Speed optimization in CRC code for big-endian CPUs. +- The BUG in Lzma2Dec.c was fixed: + Lzma2Decode function didn't work. + + +9.18 beta 2010-11-02 +------------------------- +- New small SFX module for installers (SfxSetup). + + +9.12 beta 2010-03-24 +------------------------- +- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work, + if more than 10 threads were used (or more than 20 threads in some modes). + + +9.11 beta 2010-03-15 +------------------------- +- PPMd compression method support + + +9.09 2009-12-12 +------------------------- +- The bug was fixed: + Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c + incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8. +- Some bugs were fixed + + +9.06 2009-08-17 +------------------------- +- Some changes in ANSI-C 7z Decoder interfaces. + + +9.04 2009-05-30 +------------------------- +- LZMA2 compression method support +- xz format support + + +4.65 2009-02-03 +------------------------- +- Some minor fixes + + +4.63 2008-12-31 +------------------------- +- Some minor fixes + + +4.61 beta 2008-11-23 +------------------------- +- The bug in ANSI-C LZMA Decoder was fixed: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. +- Some changes in ANSI-C 7z Decoder interfaces. +- LZMA SDK is placed in the public domain. + + +4.60 beta 2008-08-19 +------------------------- +- Some minor fixes. + + +4.59 beta 2008-08-13 +------------------------- +- The bug was fixed: + LZMA Encoder in fast compression mode could access memory outside of + allocated range in some rare cases. + + +4.58 beta 2008-05-05 +------------------------- +- ANSI-C LZMA Decoder was rewritten for speed optimizations. +- ANSI-C LZMA Encoder was included to LZMA SDK. +- C++ LZMA code now is just wrapper over ANSI-C code. + + +4.57 2007-12-12 +------------------------- +- Speed optimizations in Ñ++ LZMA Decoder. +- Small changes for more compatibility with some C/C++ compilers. + + +4.49 beta 2007-07-05 +------------------------- +- .7z ANSI-C Decoder: + - now it supports BCJ and BCJ2 filters + - now it supports files larger than 4 GB. + - now it supports "Last Write Time" field for files. +- C++ code for .7z archives compressing/decompressing from 7-zip + was included to LZMA SDK. + + +4.43 2006-06-04 +------------------------- +- Small changes for more compatibility with some C/C++ compilers. + + +4.42 2006-05-15 +------------------------- +- Small changes in .h files in ANSI-C version. + + +4.39 beta 2006-04-14 +------------------------- +- The bug in versions 4.33b:4.38b was fixed: + C++ version of LZMA encoder could not correctly compress + files larger than 2 GB with HC4 match finder (-mfhc4). + + +4.37 beta 2005-04-06 +------------------------- +- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. + + +4.35 beta 2005-03-02 +------------------------- +- The bug was fixed in C++ version of LZMA Decoder: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. + + +4.34 beta 2006-02-27 +------------------------- +- Compressing speed and memory requirements for compressing were increased +- LZMA now can use only these match finders: HC4, BT2, BT3, BT4 + + +4.32 2005-12-09 +------------------------- +- Java version of LZMA SDK was included + + +4.30 2005-11-20 +------------------------- +- Compression ratio was improved in -a2 mode +- Speed optimizations for compressing in -a2 mode +- -fb switch now supports values up to 273 +- The bug in 7z_C (7zIn.c) was fixed: + It used Alloc/Free functions from different memory pools. + So if program used two memory pools, it worked incorrectly. +- 7z_C: .7z format supporting was improved +- LZMA# SDK (C#.NET version) was included + + +4.27 (Updated) 2005-09-21 +------------------------- +- Some GUIDs/interfaces in C++ were changed. + IStream.h: + ISequentialInStream::Read now works as old ReadPart + ISequentialOutStream::Write now works as old WritePart + + +4.27 2005-08-07 +------------------------- +- The bug in LzmaDecodeSize.c was fixed: + if _LZMA_IN_CB and _LZMA_OUT_READ were defined, + decompressing worked incorrectly. + + +4.26 2005-08-05 +------------------------- +- Fixes in 7z_C code and LzmaTest.c: + previous versions could work incorrectly, + if malloc(0) returns 0 + + +4.23 2005-06-29 +------------------------- +- Small fixes in C++ code + + +4.22 2005-06-10 +------------------------- +- Small fixes + + +4.21 2005-06-08 +------------------------- +- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed +- New additional version of ANSI-C LZMA Decoder with zlib-like interface: + - LzmaStateDecode.h + - LzmaStateDecode.c + - LzmaStateTest.c +- ANSI-C LZMA Decoder now can decompress files larger than 4 GB + + +4.17 2005-04-18 +------------------------- +- New example for RAM->RAM compressing/decompressing: + LZMA + BCJ (filter for x86 code): + - LzmaRam.h + - LzmaRam.cpp + - LzmaRamDecode.h + - LzmaRamDecode.c + - -f86 switch for lzma.exe + + +4.16 2005-03-29 +------------------------- +- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): + If _LZMA_OUT_READ was defined, and if encoded stream was corrupted, + decoder could access memory outside of allocated range. +- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster). + Old version of LZMA Decoder now is in file LzmaDecodeSize.c. + LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c +- Small speed optimization in LZMA C++ code +- filter for SPARC's code was added +- Simplified version of .7z ANSI-C Decoder was included + + +4.06 2004-09-05 +------------------------- +- The bug in v4.05 was fixed: + LZMA-Encoder didn't release output stream in some cases. + + +4.05 2004-08-25 +------------------------- +- Source code of filters for x86, IA-64, ARM, ARM-Thumb + and PowerPC code was included to SDK +- Some internal minor changes + + +4.04 2004-07-28 +------------------------- +- More compatibility with some C++ compilers + + +4.03 2004-06-18 +------------------------- +- "Benchmark" command was added. It measures compressing + and decompressing speed and shows rating values. + Also it checks hardware errors. + + +4.02 2004-06-10 +------------------------- +- C++ LZMA Encoder/Decoder code now is more portable + and it can be compiled by GCC on Linux. + + +4.01 2004-02-15 +------------------------- +- Some detection of data corruption was enabled. + LzmaDecode.c / RangeDecoderReadByte + ..... + { + rd->ExtraBytes = 1; + return 0xFF; + } + + +4.00 2004-02-13 +------------------------- +- Original version of LZMA SDK + + + +HISTORY of the LZMA +------------------- + 2001-2008: Improvements to LZMA compressing/decompressing code, + keeping compatibility with original LZMA format + 1996-2001: Development of LZMA compression format + + Some milestones: + + 2001-08-30: LZMA compression was added to 7-Zip + 1999-01-02: First version of 7-Zip was released + + +End of document diff --git a/deps/LZMA-SDK/DOC/lzma-sdk.txt b/deps/LZMA-SDK/DOC/lzma-sdk.txt new file mode 100644 index 000000000..b11716938 --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma-sdk.txt @@ -0,0 +1,357 @@ +LZMA SDK 21.02 +-------------- + +LZMA SDK provides the documentation, samples, header files, +libraries, and tools you need to develop applications that +use 7z / LZMA / LZMA2 / XZ compression. + +LZMA is an improved version of famous LZ77 compression algorithm. +It was improved in way of maximum increasing of compression ratio, +keeping high decompression speed and low memory requirements for +decompressing. + +LZMA2 is a LZMA based compression method. LZMA2 provides better +multithreading support for compression than LZMA and some other improvements. + +7z is a file format for data compression and file archiving. +7z is a main file format for 7-Zip compression program (www.7-zip.org). +7z format supports different compression methods: LZMA, LZMA2 and others. +7z also supports AES-256 based encryption. + +XZ is a file format for data compression that uses LZMA2 compression. +XZ format provides additional features: SHA/CRC check, filters for +improved compression ratio, splitting to blocks and streams, + + + +LICENSE +------- + +LZMA SDK is written and placed in the public domain by Igor Pavlov. + +Some code in LZMA SDK is based on public domain code from another developers: + 1) PPMd var.H (2001): Dmitry Shkarin + 2) SHA-256: Wei Dai (Crypto++ library) + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute the +original LZMA SDK code, either in source code form or as a compiled binary, for +any purpose, commercial or non-commercial, and by any means. + +LZMA SDK code is compatible with open source licenses, for example, you can +include it to GNU GPL or GNU LGPL code. + + +LZMA SDK Contents +----------------- + + Source code: + + - C / C++ / C# / Java - LZMA compression and decompression + - C / C++ - LZMA2 compression and decompression + - C / C++ - XZ compression and decompression + - C - 7z decompression + - C++ - 7z compression and decompression + - C - small SFXs for installers (7z decompression) + - C++ - SFXs and SFXs for installers (7z decompression) + + Precomiled binaries: + + - console programs for lzma / 7z / xz compression and decompression + - SFX modules for installers. + + +UNIX/Linux version +------------------ +To compile C++ version of file->file LZMA encoding, go to directory +CPP/7zip/Bundles/LzmaCon +and call make to recompile it: + make -f makefile.gcc clean all + +In some UNIX/Linux versions you must compile LZMA with static libraries. +To compile with static libraries, you can use +LIB = -lm -static + +Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux): + + http://p7zip.sourceforge.net/ + + +Files +----- + +DOC/7zC.txt - 7z ANSI-C Decoder description +DOC/7zFormat.txt - 7z Format description +DOC/installer.txt - information about 7-Zip for installers +DOC/lzma.txt - LZMA compression description +DOC/lzma-sdk.txt - LZMA SDK description (this file) +DOC/lzma-history.txt - history of LZMA SDK +DOC/lzma-specification.txt - Specification of LZMA +DOC/Methods.txt - Compression method IDs for .7z + +bin/installer/ - example script to create installer that uses SFX module, + +bin/7zdec.exe - simplified 7z archive decoder +bin/7zr.exe - 7-Zip console program (reduced version) +bin/x64/7zr.exe - 7-Zip console program (reduced version) (x64 version) +bin/lzma.exe - file->file LZMA encoder/decoder for Windows +bin/7zS2.sfx - small SFX module for installers (GUI version) +bin/7zS2con.sfx - small SFX module for installers (Console version) +bin/7zSD.sfx - SFX module for installers. + + +7zDec.exe +--------- +7zDec.exe is simplified 7z archive decoder. +It supports only LZMA, LZMA2, and PPMd methods. +7zDec decodes whole solid block from 7z archive to RAM. +The RAM consumption can be high. + + + + +Source code structure +--------------------- + + +Asm/ - asm files (optimized code for CRC calculation and Intel-AES encryption) + +C/ - C files (compression / decompression and other) + Util/ + 7z - 7z decoder program (decoding 7z files) + Lzma - LZMA program (file->file LZMA encoder/decoder). + LzmaLib - LZMA library (.DLL for Windows) + SfxSetup - small SFX module for installers + +CPP/ -- CPP files + + Common - common files for C++ projects + Windows - common files for Windows related code + + 7zip - files related to 7-Zip + + Archive - files related to archiving + + Common - common files for archive handling + 7z - 7z C++ Encoder/Decoder + + Bundles - Modules that are bundles of other modules (files) + + Alone7z - 7zr.exe: Standalone 7-Zip console program (reduced version) + Format7zExtractR - 7zxr.dll: Reduced version of 7z DLL: extracting from 7z/LZMA/BCJ/BCJ2. + Format7zR - 7zr.dll: Reduced version of 7z DLL: extracting/compressing to 7z/LZMA/BCJ/BCJ2 + LzmaCon - lzma.exe: LZMA compression/decompression + LzmaSpec - example code for LZMA Specification + SFXCon - 7zCon.sfx: Console 7z SFX module + SFXSetup - 7zS.sfx: 7z SFX module for installers + SFXWin - 7z.sfx: GUI 7z SFX module + + Common - common files for 7-Zip + + Compress - files for compression/decompression + + Crypto - files for encryption / decompression + + UI - User Interface files + + Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll + Common - Common UI files + Console - Code for console program (7z.exe) + Explorer - Some code from 7-Zip Shell extension + FileManager - Some GUI code from 7-Zip File Manager + GUI - Some GUI code from 7-Zip + + +CS/ - C# files + 7zip + Common - some common files for 7-Zip + Compress - files related to compression/decompression + LZ - files related to LZ (Lempel-Ziv) compression algorithm + LZMA - LZMA compression/decompression + LzmaAlone - file->file LZMA compression/decompression + RangeCoder - Range Coder (special code of compression/decompression) + +Java/ - Java files + SevenZip + Compression - files related to compression/decompression + LZ - files related to LZ (Lempel-Ziv) compression algorithm + LZMA - LZMA compression/decompression + RangeCoder - Range Coder (special code of compression/decompression) + + +Note: + Asm / C / C++ source code of LZMA SDK is part of 7-Zip's source code. + 7-Zip's source code can be downloaded from 7-Zip's SourceForge page: + + http://sourceforge.net/projects/sevenzip/ + + + +LZMA features +------------- + - Variable dictionary size (up to 1 GB) + - Estimated compressing speed: about 2 MB/s on 2 GHz CPU + - Estimated decompressing speed: + - 20-30 MB/s on modern 2 GHz cpu + - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC) + - Small memory requirements for decompressing (16 KB + DictionarySize) + - Small code size for decompressing: 5-8 KB + +LZMA decoder uses only integer operations and can be +implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions). + +Some critical operations that affect the speed of LZMA decompression: + 1) 32*16 bit integer multiply + 2) Mispredicted branches (penalty mostly depends from pipeline length) + 3) 32-bit shift and arithmetic operations + +The speed of LZMA decompressing mostly depends from CPU speed. +Memory speed has no big meaning. But if your CPU has small data cache, +overall weight of memory speed will slightly increase. + + +How To Use +---------- + +Using LZMA encoder/decoder executable +-------------------------------------- + +Usage: LZMA inputFile outputFile [...] + + e: encode file + + d: decode file + + b: Benchmark. There are two tests: compressing and decompressing + with LZMA method. Benchmark shows rating in MIPS (million + instructions per second). Rating value is calculated from + measured speed and it is normalized with Intel's Core 2 results. + Also Benchmark checks possible hardware errors (RAM + errors in most cases). Benchmark uses these settings: + (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter. + Also you can change the number of iterations. Example for 30 iterations: + LZMA b 30 + Default number of iterations is 10. + + + + + -a{N}: set compression mode 0 = fast, 1 = normal + default: 1 (normal) + + d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB) + The maximum value for dictionary size is 1 GB = 2^30 bytes. + Dictionary size is calculated as DictionarySize = 2^N bytes. + For decompressing file compressed by LZMA method with dictionary + size D = 2^N you need about D bytes of memory (RAM). + + -fb{N}: set number of fast bytes - [5, 273], default: 128 + Usually big number gives a little bit better compression ratio + and slower compression process. + + -lc{N}: set number of literal context bits - [0, 8], default: 3 + Sometimes lc=4 gives gain for big files. + + -lp{N}: set number of literal pos bits - [0, 4], default: 0 + lp switch is intended for periodical data when period is + equal 2^N. For example, for 32-bit (4 bytes) + periodical data you can use lp=2. Often it's better to set lc0, + if you change lp switch. + + -pb{N}: set number of pos bits - [0, 4], default: 2 + pb switch is intended for periodical data + when period is equal 2^N. + + -mf{MF_ID}: set Match Finder. Default: bt4. + Algorithms from hc* group doesn't provide good compression + ratio, but they often works pretty fast in combination with + fast mode (-a0). + + Memory requirements depend from dictionary size + (parameter "d" in table below). + + MF_ID Memory Description + + bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing. + bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing. + bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing. + hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing. + + -eos: write End Of Stream marker. By default LZMA doesn't write + eos marker, since LZMA decoder knows uncompressed size + stored in .lzma file header. + + -si: Read data from stdin (it will write End Of Stream marker). + -so: Write data to stdout + + +Examples: + +1) LZMA e file.bin file.lzma -d16 -lc0 + +compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K) +and 0 literal context bits. -lc0 allows to reduce memory requirements +for decompression. + + +2) LZMA e file.bin file.lzma -lc0 -lp2 + +compresses file.bin to file.lzma with settings suitable +for 32-bit periodical data (for example, ARM or MIPS code). + +3) LZMA d file.lzma file.bin + +decompresses file.lzma to file.bin. + + +Compression ratio hints +----------------------- + +Recommendations +--------------- + +To increase the compression ratio for LZMA compressing it's desirable +to have aligned data (if it's possible) and also it's desirable to locate +data in such order, where code is grouped in one place and data is +grouped in other place (it's better than such mixing: code, data, code, +data, ...). + + +Filters +------- +You can increase the compression ratio for some data types, using +special filters before compressing. For example, it's possible to +increase the compression ratio on 5-10% for code for those CPU ISAs: +x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC. + +You can find C source code of such filters in C/Bra*.* files + +You can check the compression ratio gain of these filters with such +7-Zip commands (example for ARM code): +No filter: + 7z a a1.7z a.bin -m0=lzma + +With filter for little-endian ARM code: + 7z a a2.7z a.bin -m0=arm -m1=lzma + +It works in such manner: +Compressing = Filter_encoding + LZMA_encoding +Decompressing = LZMA_decoding + Filter_decoding + +Compressing and decompressing speed of such filters is very high, +so it will not increase decompressing time too much. +Moreover, it reduces decompression time for LZMA_decoding, +since compression ratio with filtering is higher. + +These filters convert CALL (calling procedure) instructions +from relative offsets to absolute addresses, so such data becomes more +compressible. + +For some ISAs (for example, for MIPS) it's impossible to get gain from such filter. + + + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html diff --git a/deps/LZMA-SDK/DOC/lzma-specification.txt b/deps/LZMA-SDK/DOC/lzma-specification.txt new file mode 100644 index 000000000..b6796df75 --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma-specification.txt @@ -0,0 +1,1176 @@ +LZMA specification (DRAFT version) +---------------------------------- + +Author: Igor Pavlov +Date: 2015-06-14 + +This specification defines the format of LZMA compressed data and lzma file format. + +Notation +-------- + +We use the syntax of C++ programming language. +We use the following types in C++ code: + unsigned - unsigned integer, at least 16 bits in size + int - signed integer, at least 16 bits in size + UInt64 - 64-bit unsigned integer + UInt32 - 32-bit unsigned integer + UInt16 - 16-bit unsigned integer + Byte - 8-bit unsigned integer + bool - boolean type with two possible values: false, true + + +lzma file format +================ + +The lzma file contains the raw LZMA stream and the header with related properties. + +The files in that format use ".lzma" extension. + +The lzma file format layout: + +Offset Size Description + + 0 1 LZMA model properties (lc, lp, pb) in encoded form + 1 4 Dictionary size (32-bit unsigned integer, little-endian) + 5 8 Uncompressed size (64-bit unsigned integer, little-endian) + 13 Compressed data (LZMA stream) + +LZMA properties: + + name Range Description + + lc [0, 8] the number of "literal context" bits + lp [0, 4] the number of "literal pos" bits + pb [0, 4] the number of "pos" bits +dictSize [0, 2^32 - 1] the dictionary size + +The following code encodes LZMA properties: + +void EncodeProperties(Byte *properties) +{ + properties[0] = (Byte)((pb * 5 + lp) * 9 + lc); + Set_UInt32_LittleEndian(properties + 1, dictSize); +} + +If the value of dictionary size in properties is smaller than (1 << 12), +the LZMA decoder must set the dictionary size variable to (1 << 12). + +#define LZMA_DIC_MIN (1 << 12) + + unsigned lc, pb, lp; + UInt32 dictSize; + UInt32 dictSizeInProperties; + + void DecodeProperties(const Byte *properties) + { + unsigned d = properties[0]; + if (d >= (9 * 5 * 5)) + throw "Incorrect LZMA properties"; + lc = d % 9; + d /= 9; + pb = d / 5; + lp = d % 5; + dictSizeInProperties = 0; + for (int i = 0; i < 4; i++) + dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i); + dictSize = dictSizeInProperties; + if (dictSize < LZMA_DIC_MIN) + dictSize = LZMA_DIC_MIN; + } + +If "Uncompressed size" field contains ones in all 64 bits, it means that +uncompressed size is unknown and there is the "end marker" in stream, +that indicates the end of decoding point. +In opposite case, if the value from "Uncompressed size" field is not +equal to ((2^64) - 1), the LZMA stream decoding must be finished after +specified number of bytes (Uncompressed size) is decoded. And if there +is the "end marker", the LZMA decoder must read that marker also. + + +The new scheme to encode LZMA properties +---------------------------------------- + +If LZMA compression is used for some another format, it's recommended to +use a new improved scheme to encode LZMA properties. That new scheme was +used in xz format that uses the LZMA2 compression algorithm. +The LZMA2 is a new compression algorithm that is based on the LZMA algorithm. + +The dictionary size in LZMA2 is encoded with just one byte and LZMA2 supports +only reduced set of dictionary sizes: + (2 << 11), (3 << 11), + (2 << 12), (3 << 12), + ... + (2 << 30), (3 << 30), + (2 << 31) - 1 + +The dictionary size can be extracted from encoded value with the following code: + + dictSize = (p == 40) ? 0xFFFFFFFF : (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)); + +Also there is additional limitation (lc + lp <= 4) in LZMA2 for values of +"lc" and "lp" properties: + + if (lc + lp > 4) + throw "Unsupported properties: (lc + lp) > 4"; + +There are some advantages for LZMA decoder with such (lc + lp) value +limitation. It reduces the maximum size of tables allocated by decoder. +And it reduces the complexity of initialization procedure, that can be +important to keep high speed of decoding of big number of small LZMA streams. + +It's recommended to use that limitation (lc + lp <= 4) for any new format +that uses LZMA compression. Note that the combinations of "lc" and "lp" +parameters, where (lc + lp > 4), can provide significant improvement in +compression ratio only in some rare cases. + +The LZMA properties can be encoded into two bytes in new scheme: + +Offset Size Description + + 0 1 The dictionary size encoded with LZMA2 scheme + 1 1 LZMA model properties (lc, lp, pb) in encoded form + + +The RAM usage +============= + +The RAM usage for LZMA decoder is determined by the following parts: + +1) The Sliding Window (from 4 KiB to 4 GiB). +2) The probability model counter arrays (arrays of 16-bit variables). +3) Some additional state variables (about 10 variables of 32-bit integers). + + +The RAM usage for Sliding Window +-------------------------------- + +There are two main scenarios of decoding: + +1) The decoding of full stream to one RAM buffer. + + If we decode full LZMA stream to one output buffer in RAM, the decoder + can use that output buffer as sliding window. So the decoder doesn't + need additional buffer allocated for sliding window. + +2) The decoding to some external storage. + + If we decode LZMA stream to external storage, the decoder must allocate + the buffer for sliding window. The size of that buffer must be equal + or larger than the value of dictionary size from properties of LZMA stream. + +In this specification we describe the code for decoding to some external +storage. The optimized version of code for decoding of full stream to one +output RAM buffer can require some minor changes in code. + + +The RAM usage for the probability model counters +------------------------------------------------ + +The size of the probability model counter arrays is calculated with the +following formula: + +size_of_prob_arrays = 1846 + 768 * (1 << (lp + lc)) + +Each probability model counter is 11-bit unsigned integer. +If we use 16-bit integer variables (2-byte integers) for these probability +model counters, the RAM usage required by probability model counter arrays +can be estimated with the following formula: + + RAM = 4 KiB + 1.5 KiB * (1 << (lp + lc)) + +For example, for default LZMA parameters (lp = 0 and lc = 3), the RAM usage is + + RAM_lc3_lp0 = 4 KiB + 1.5 KiB * 8 = 16 KiB + +The maximum RAM state usage is required for decoding the stream with lp = 4 +and lc = 8: + + RAM_lc8_lp4 = 4 KiB + 1.5 KiB * 4096 = 6148 KiB + +If the decoder uses LZMA2's limited property condition +(lc + lp <= 4), the RAM usage will be not larger than + + RAM_lc_lp_4 = 4 KiB + 1.5 KiB * 16 = 28 KiB + + +The RAM usage for encoder +------------------------- + +There are many variants for LZMA encoding code. +These variants have different values for memory consumption. +Note that memory consumption for LZMA Encoder can not be +smaller than memory consumption of LZMA Decoder for same stream. + +The RAM usage required by modern effective implementation of +LZMA Encoder can be estimated with the following formula: + + Encoder_RAM_Usage = 4 MiB + 11 * dictionarySize. + +But there are some modes of the encoder that require less memory. + + +LZMA Decoding +============= + +The LZMA compression algorithm uses LZ-based compression with Sliding Window +and Range Encoding as entropy coding method. + + +Sliding Window +-------------- + +LZMA uses Sliding Window compression similar to LZ77 algorithm. + +LZMA stream must be decoded to the sequence that consists +of MATCHES and LITERALS: + + - a LITERAL is a 8-bit character (one byte). + The decoder just puts that LITERAL to the uncompressed stream. + + - a MATCH is a pair of two numbers (DISTANCE-LENGTH pair). + The decoder takes one byte exactly "DISTANCE" characters behind + current position in the uncompressed stream and puts it to + uncompressed stream. The decoder must repeat it "LENGTH" times. + +The "DISTANCE" can not be larger than dictionary size. +And the "DISTANCE" can not be larger than the number of bytes in +the uncompressed stream that were decoded before that match. + +In this specification we use cyclic buffer to implement Sliding Window +for LZMA decoder: + +class COutWindow +{ + Byte *Buf; + UInt32 Pos; + UInt32 Size; + bool IsFull; + +public: + unsigned TotalPos; + COutStream OutStream; + + COutWindow(): Buf(NULL) {} + ~COutWindow() { delete []Buf; } + + void Create(UInt32 dictSize) + { + Buf = new Byte[dictSize]; + Pos = 0; + Size = dictSize; + IsFull = false; + TotalPos = 0; + } + + void PutByte(Byte b) + { + TotalPos++; + Buf[Pos++] = b; + if (Pos == Size) + { + Pos = 0; + IsFull = true; + } + OutStream.WriteByte(b); + } + + Byte GetByte(UInt32 dist) const + { + return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos]; + } + + void CopyMatch(UInt32 dist, unsigned len) + { + for (; len > 0; len--) + PutByte(GetByte(dist)); + } + + bool CheckDistance(UInt32 dist) const + { + return dist <= Pos || IsFull; + } + + bool IsEmpty() const + { + return Pos == 0 && !IsFull; + } +}; + + +In another implementation it's possible to use one buffer that contains +Sliding Window and the whole data stream after uncompressing. + + +Range Decoder +------------- + +LZMA algorithm uses Range Encoding (1) as entropy coding method. + +LZMA stream contains just one very big number in big-endian encoding. +LZMA decoder uses the Range Decoder to extract a sequence of binary +symbols from that big number. + +The state of the Range Decoder: + +struct CRangeDecoder +{ + UInt32 Range; + UInt32 Code; + InputStream *InStream; + + bool Corrupted; +} + +The notes about UInt32 type for the "Range" and "Code" variables: + + It's possible to use 64-bit (unsigned or signed) integer type + for the "Range" and the "Code" variables instead of 32-bit unsigned, + but some additional code must be used to truncate the values to + low 32-bits after some operations. + + If the programming language does not support 32-bit unsigned integer type + (like in case of JAVA language), it's possible to use 32-bit signed integer, + but some code must be changed. For example, it's required to change the code + that uses comparison operations for UInt32 variables in this specification. + +The Range Decoder can be in some states that can be treated as +"Corruption" in LZMA stream. The Range Decoder uses the variable "Corrupted": + + (Corrupted == false), if the Range Decoder has not detected any corruption. + (Corrupted == true), if the Range Decoder has detected some corruption. + +The reference LZMA Decoder ignores the value of the "Corrupted" variable. +So it continues to decode the stream, even if the corruption can be detected +in the Range Decoder. To provide the full compatibility with output of the +reference LZMA Decoder, another LZMA Decoder implementations must also +ignore the value of the "Corrupted" variable. + +The LZMA Encoder is required to create only such LZMA streams, that will not +lead the Range Decoder to states, where the "Corrupted" variable is set to true. + +The Range Decoder reads first 5 bytes from input stream to initialize +the state: + +bool CRangeDecoder::Init() +{ + Corrupted = false; + Range = 0xFFFFFFFF; + Code = 0; + + Byte b = InStream->ReadByte(); + + for (int i = 0; i < 4; i++) + Code = (Code << 8) | InStream->ReadByte(); + + if (b != 0 || Code == Range) + Corrupted = true; + return b == 0; +} + +The LZMA Encoder always writes ZERO in initial byte of compressed stream. +That scheme allows to simplify the code of the Range Encoder in the +LZMA Encoder. If initial byte is not equal to ZERO, the LZMA Decoder must +stop decoding and report error. + +After the last bit of data was decoded by Range Decoder, the value of the +"Code" variable must be equal to 0. The LZMA Decoder must check it by +calling the IsFinishedOK() function: + + bool IsFinishedOK() const { return Code == 0; } + +If there is corruption in data stream, there is big probability that +the "Code" value will be not equal to 0 in the Finish() function. So that +check in the IsFinishedOK() function provides very good feature for +corruption detection. + +The value of the "Range" variable before each bit decoding can not be smaller +than ((UInt32)1 << 24). The Normalize() function keeps the "Range" value in +described range. + +#define kTopValue ((UInt32)1 << 24) + +void CRangeDecoder::Normalize() +{ + if (Range < kTopValue) + { + Range <<= 8; + Code = (Code << 8) | InStream->ReadByte(); + } +} + +Notes: if the size of the "Code" variable is larger than 32 bits, it's +required to keep only low 32 bits of the "Code" variable after the change +in Normalize() function. + +If the LZMA Stream is not corrupted, the value of the "Code" variable is +always smaller than value of the "Range" variable. +But the Range Decoder ignores some types of corruptions, so the value of +the "Code" variable can be equal or larger than value of the "Range" variable +for some "Corrupted" archives. + + +LZMA uses Range Encoding only with binary symbols of two types: + 1) binary symbols with fixed and equal probabilities (direct bits) + 2) binary symbols with predicted probabilities + +The DecodeDirectBits() function decodes the sequence of direct bits: + +UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits) +{ + UInt32 res = 0; + do + { + Range >>= 1; + Code -= Range; + UInt32 t = 0 - ((UInt32)Code >> 31); + Code += Range & t; + + if (Code == Range) + Corrupted = true; + + Normalize(); + res <<= 1; + res += t + 1; + } + while (--numBits); + return res; +} + + +The Bit Decoding with Probability Model +--------------------------------------- + +The task of Bit Probability Model is to estimate probabilities of binary +symbols. And then it provides the Range Decoder with that information. +The better prediction provides better compression ratio. +The Bit Probability Model uses statistical data of previous decoded +symbols. + +That estimated probability is presented as 11-bit unsigned integer value +that represents the probability of symbol "0". + +#define kNumBitModelTotalBits 11 + +Mathematical probabilities can be presented with the following formulas: + probability(symbol_0) = prob / 2048. + probability(symbol_1) = 1 - Probability(symbol_0) = + = 1 - prob / 2048 = + = (2048 - prob) / 2048 +where the "prob" variable contains 11-bit integer probability counter. + +It's recommended to use 16-bit unsigned integer type, to store these 11-bit +probability values: + +typedef UInt16 CProb; + +Each probability value must be initialized with value ((1 << 11) / 2), +that represents the state, where probabilities of symbols 0 and 1 +are equal to 0.5: + +#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2) + +The INIT_PROBS macro is used to initialize the array of CProb variables: + +#define INIT_PROBS(p) \ + { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; } + + +The DecodeBit() function decodes one bit. +The LZMA decoder provides the pointer to CProb variable that contains +information about estimated probability for symbol 0 and the Range Decoder +updates that CProb variable after decoding. The Range Decoder increases +estimated probability of the symbol that was decoded: + +#define kNumMoveBits 5 + +unsigned CRangeDecoder::DecodeBit(CProb *prob) +{ + unsigned v = *prob; + UInt32 bound = (Range >> kNumBitModelTotalBits) * v; + unsigned symbol; + if (Code < bound) + { + v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits; + Range = bound; + symbol = 0; + } + else + { + v -= v >> kNumMoveBits; + Code -= bound; + Range -= bound; + symbol = 1; + } + *prob = (CProb)v; + Normalize(); + return symbol; +} + + +The Binary Tree of bit model counters +------------------------------------- + +LZMA uses a tree of Bit model variables to decode symbol that needs +several bits for storing. There are two versions of such trees in LZMA: + 1) the tree that decodes bits from high bit to low bit (the normal scheme). + 2) the tree that decodes bits from low bit to high bit (the reverse scheme). + +Each binary tree structure supports different size of decoded symbol +(the size of binary sequence that contains value of symbol). +If that size of decoded symbol is "NumBits" bits, the tree structure +uses the array of (2 << NumBits) counters of CProb type. +But only ((2 << NumBits) - 1) items are used by encoder and decoder. +The first item (the item with index equal to 0) in array is unused. +That scheme with unused array's item allows to simplify the code. + +unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc) +{ + unsigned m = 1; + unsigned symbol = 0; + for (unsigned i = 0; i < numBits; i++) + { + unsigned bit = rc->DecodeBit(&probs[m]); + m <<= 1; + m += bit; + symbol |= (bit << i); + } + return symbol; +} + +template +class CBitTreeDecoder +{ + CProb Probs[(unsigned)1 << NumBits]; + +public: + + void Init() + { + INIT_PROBS(Probs); + } + + unsigned Decode(CRangeDecoder *rc) + { + unsigned m = 1; + for (unsigned i = 0; i < NumBits; i++) + m = (m << 1) + rc->DecodeBit(&Probs[m]); + return m - ((unsigned)1 << NumBits); + } + + unsigned ReverseDecode(CRangeDecoder *rc) + { + return BitTreeReverseDecode(Probs, NumBits, rc); + } +}; + + +LZ part of LZMA +--------------- + +LZ part of LZMA describes details about the decoding of MATCHES and LITERALS. + + +The Literal Decoding +-------------------- + +The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where +each table contains 0x300 CProb values: + + CProb *LitProbs; + + void CreateLiterals() + { + LitProbs = new CProb[(UInt32)0x300 << (lc + lp)]; + } + + void InitLiterals() + { + UInt32 num = (UInt32)0x300 << (lc + lp); + for (UInt32 i = 0; i < num; i++) + LitProbs[i] = PROB_INIT_VAL; + } + +To select the table for decoding it uses the context that consists of +(lc) high bits from previous literal and (lp) low bits from value that +represents current position in outputStream. + +If (State > 7), the Literal Decoder also uses "matchByte" that represents +the byte in OutputStream at position the is the DISTANCE bytes before +current position, where the DISTANCE is the distance in DISTANCE-LENGTH pair +of latest decoded match. + +The following code decodes one literal and puts it to Sliding Window buffer: + + void DecodeLiteral(unsigned state, UInt32 rep0) + { + unsigned prevByte = 0; + if (!OutWindow.IsEmpty()) + prevByte = OutWindow.GetByte(1); + + unsigned symbol = 1; + unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc)); + CProb *probs = &LitProbs[(UInt32)0x300 * litState]; + + if (state >= 7) + { + unsigned matchByte = OutWindow.GetByte(rep0 + 1); + do + { + unsigned matchBit = (matchByte >> 7) & 1; + matchByte <<= 1; + unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]); + symbol = (symbol << 1) | bit; + if (matchBit != bit) + break; + } + while (symbol < 0x100); + } + while (symbol < 0x100) + symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]); + OutWindow.PutByte((Byte)(symbol - 0x100)); + } + + +The match length decoding +------------------------- + +The match length decoder returns normalized (zero-based value) +length of match. That value can be converted to real length of the match +with the following code: + +#define kMatchMinLen 2 + + matchLen = len + kMatchMinLen; + +The match length decoder can return the values from 0 to 271. +And the corresponded real match length values can be in the range +from 2 to 273. + +The following scheme is used for the match length encoding: + + Binary encoding Binary Tree structure Zero-based match length + sequence (binary + decimal): + + 0 xxx LowCoder[posState] xxx + 1 0 yyy MidCoder[posState] yyy + 8 + 1 1 zzzzzzzz HighCoder zzzzzzzz + 16 + +LZMA uses bit model variable "Choice" to decode the first selection bit. + +If the first selection bit is equal to 0, the decoder uses binary tree + LowCoder[posState] to decode 3-bit zero-based match length (xxx). + +If the first selection bit is equal to 1, the decoder uses bit model + variable "Choice2" to decode the second selection bit. + + If the second selection bit is equal to 0, the decoder uses binary tree + MidCoder[posState] to decode 3-bit "yyy" value, and zero-based match + length is equal to (yyy + 8). + + If the second selection bit is equal to 1, the decoder uses binary tree + HighCoder to decode 8-bit "zzzzzzzz" value, and zero-based + match length is equal to (zzzzzzzz + 16). + +LZMA uses "posState" value as context to select the binary tree +from LowCoder and MidCoder binary tree arrays: + + unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); + +The full code of the length decoder: + +class CLenDecoder +{ + CProb Choice; + CProb Choice2; + CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax]; + CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax]; + CBitTreeDecoder<8> HighCoder; + +public: + + void Init() + { + Choice = PROB_INIT_VAL; + Choice2 = PROB_INIT_VAL; + HighCoder.Init(); + for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++) + { + LowCoder[i].Init(); + MidCoder[i].Init(); + } + } + + unsigned Decode(CRangeDecoder *rc, unsigned posState) + { + if (rc->DecodeBit(&Choice) == 0) + return LowCoder[posState].Decode(rc); + if (rc->DecodeBit(&Choice2) == 0) + return 8 + MidCoder[posState].Decode(rc); + return 16 + HighCoder.Decode(rc); + } +}; + +The LZMA decoder uses two instances of CLenDecoder class. +The first instance is for the matches of "Simple Match" type, +and the second instance is for the matches of "Rep Match" type: + + CLenDecoder LenDecoder; + CLenDecoder RepLenDecoder; + + +The match distance decoding +--------------------------- + +LZMA supports dictionary sizes up to 4 GiB minus 1. +The value of match distance (decoded by distance decoder) can be +from 1 to 2^32. But the distance value that is equal to 2^32 is used to +indicate the "End of stream" marker. So real largest match distance +that is used for LZ-window match is (2^32 - 1). + +LZMA uses normalized match length (zero-based length) +to calculate the context state "lenState" do decode the distance value: + +#define kNumLenToPosStates 4 + + unsigned lenState = len; + if (lenState > kNumLenToPosStates - 1) + lenState = kNumLenToPosStates - 1; + +The distance decoder returns the "dist" value that is zero-based value +of match distance. The real match distance can be calculated with the +following code: + + matchDistance = dist + 1; + +The state of the distance decoder and the initialization code: + + #define kEndPosModelIndex 14 + #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + #define kNumAlignBits 4 + + CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates]; + CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex]; + CBitTreeDecoder AlignDecoder; + + void InitDist() + { + for (unsigned i = 0; i < kNumLenToPosStates; i++) + PosSlotDecoder[i].Init(); + AlignDecoder.Init(); + INIT_PROBS(PosDecoders); + } + +At first stage the distance decoder decodes 6-bit "posSlot" value with bit +tree decoder from PosSlotDecoder array. It's possible to get 2^6=64 different +"posSlot" values. + + unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); + +The encoding scheme for distance value is shown in the following table: + +posSlot (decimal) / + zero-based distance (binary) + 0 0 + 1 1 + 2 10 + 3 11 + + 4 10 x + 5 11 x + 6 10 xx + 7 11 xx + 8 10 xxx + 9 11 xxx +10 10 xxxx +11 11 xxxx +12 10 xxxxx +13 11 xxxxx + +14 10 yy zzzz +15 11 yy zzzz +16 10 yyy zzzz +17 11 yyy zzzz +... +62 10 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz +63 11 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz + +where + "x ... x" means the sequence of binary symbols encoded with binary tree and + "Reverse" scheme. It uses separated binary tree for each posSlot from 4 to 13. + "y" means direct bit encoded with range coder. + "zzzz" means the sequence of four binary symbols encoded with binary + tree with "Reverse" scheme, where one common binary tree "AlignDecoder" + is used for all posSlot values. + +If (posSlot < 4), the "dist" value is equal to posSlot value. + +If (posSlot >= 4), the decoder uses "posSlot" value to calculate the value of + the high bits of "dist" value and the number of the low bits. + + If (4 <= posSlot < kEndPosModelIndex), the decoder uses bit tree decoders. + (one separated bit tree decoder per one posSlot value) and "Reverse" scheme. + In this implementation we use one CProb array "PosDecoders" that contains + all CProb variables for all these bit decoders. + + if (posSlot >= kEndPosModelIndex), the middle bits are decoded as direct + bits from RangeDecoder and the low 4 bits are decoded with a bit tree + decoder "AlignDecoder" with "Reverse" scheme. + +The code to decode zero-based match distance: + + unsigned DecodeDistance(unsigned len) + { + unsigned lenState = len; + if (lenState > kNumLenToPosStates - 1) + lenState = kNumLenToPosStates - 1; + + unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); + if (posSlot < 4) + return posSlot; + + unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1); + UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits); + if (posSlot < kEndPosModelIndex) + dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec); + else + { + dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits; + dist += AlignDecoder.ReverseDecode(&RangeDec); + } + return dist; + } + + + +LZMA Decoding modes +------------------- + +There are 2 types of LZMA streams: + +1) The stream with "End of stream" marker. +2) The stream without "End of stream" marker. + +And the LZMA Decoder supports 3 modes of decoding: + +1) The unpack size is undefined. The LZMA decoder stops decoding after + getting "End of stream" marker. + The input variables for that case: + + markerIsMandatory = true + unpackSizeDefined = false + unpackSize contains any value + +2) The unpack size is defined and LZMA decoder supports both variants, + where the stream can contain "End of stream" marker or the stream is + finished without "End of stream" marker. The LZMA decoder must detect + any of these situations. + The input variables for that case: + + markerIsMandatory = false + unpackSizeDefined = true + unpackSize contains unpack size + +3) The unpack size is defined and the LZMA stream must contain + "End of stream" marker + The input variables for that case: + + markerIsMandatory = true + unpackSizeDefined = true + unpackSize contains unpack size + + +The main loop of decoder +------------------------ + +The main loop of LZMA decoder: + +Initialize the LZMA state. +loop +{ + // begin of loop + Check "end of stream" conditions. + Decode Type of MATCH / LITERAL. + If it's LITERAL, decode LITERAL value and put the LITERAL to Window. + If it's MATCH, decode the length of match and the match distance. + Check error conditions, check end of stream conditions and copy + the sequence of match bytes from sliding window to current position + in window. + Go to begin of loop +} + +The reference implementation of LZMA decoder uses "unpackSize" variable +to keep the number of remaining bytes in output stream. So it reduces +"unpackSize" value after each decoded LITERAL or MATCH. + +The following code contains the "end of stream" condition check at the start +of the loop: + + if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory) + if (RangeDec.IsFinishedOK()) + return LZMA_RES_FINISHED_WITHOUT_MARKER; + +LZMA uses three types of matches: + +1) "Simple Match" - the match with distance value encoded with bit models. + +2) "Rep Match" - the match that uses the distance from distance + history table. + +3) "Short Rep Match" - the match of single byte length, that uses the latest + distance from distance history table. + +The LZMA decoder keeps the history of latest 4 match distances that were used +by decoder. That set of 4 variables contains zero-based match distances and +these variables are initialized with zero values: + + UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0; + +The LZMA decoder uses binary model variables to select type of MATCH or LITERAL: + +#define kNumStates 12 +#define kNumPosBitsMax 4 + + CProb IsMatch[kNumStates << kNumPosBitsMax]; + CProb IsRep[kNumStates]; + CProb IsRepG0[kNumStates]; + CProb IsRepG1[kNumStates]; + CProb IsRepG2[kNumStates]; + CProb IsRep0Long[kNumStates << kNumPosBitsMax]; + +The decoder uses "state" variable value to select exact variable +from "IsRep", "IsRepG0", "IsRepG1" and "IsRepG2" arrays. +The "state" variable can get the value from 0 to 11. +Initial value for "state" variable is zero: + + unsigned state = 0; + +The "state" variable is updated after each LITERAL or MATCH with one of the +following functions: + +unsigned UpdateState_Literal(unsigned state) +{ + if (state < 4) return 0; + else if (state < 10) return state - 3; + else return state - 6; +} +unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; } +unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; } +unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; } + +The decoder calculates "state2" variable value to select exact variable from +"IsMatch" and "IsRep0Long" arrays: + +unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); +unsigned state2 = (state << kNumPosBitsMax) + posState; + +The decoder uses the following code flow scheme to select exact +type of LITERAL or MATCH: + +IsMatch[state2] decode + 0 - the Literal + 1 - the Match + IsRep[state] decode + 0 - Simple Match + 1 - Rep Match + IsRepG0[state] decode + 0 - the distance is rep0 + IsRep0Long[state2] decode + 0 - Short Rep Match + 1 - Rep Match 0 + 1 - + IsRepG1[state] decode + 0 - Rep Match 1 + 1 - + IsRepG2[state] decode + 0 - Rep Match 2 + 1 - Rep Match 3 + + +LITERAL symbol +-------------- +If the value "0" was decoded with IsMatch[state2] decoding, we have "LITERAL" type. + +At first the LZMA decoder must check that it doesn't exceed +specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Then it decodes literal value and puts it to sliding window: + + DecodeLiteral(state, rep0); + +Then the decoder must update the "state" value and "unpackSize" value; + + state = UpdateState_Literal(state); + unpackSize--; + +Then the decoder must go to the begin of main loop to decode next Match or Literal. + + +Simple Match +------------ + +If the value "1" was decoded with IsMatch[state2] decoding, +we have the "Simple Match" type. + +The distance history table is updated with the following scheme: + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + +The zero-based length is decoded with "LenDecoder": + + len = LenDecoder.Decode(&RangeDec, posState); + +The state is update with UpdateState_Match function: + + state = UpdateState_Match(state); + +and the new "rep0" value is decoded with DecodeDistance: + + rep0 = DecodeDistance(len); + +That "rep0" will be used as zero-based distance for current match. + +If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have +"End of stream" marker, so we can stop decoding and check finishing +condition in Range Decoder: + + if (rep0 == 0xFFFFFFFF) + return RangeDec.IsFinishedOK() ? + LZMA_RES_FINISHED_WITH_MARKER : + LZMA_RES_ERROR; + +If uncompressed size is defined, LZMA decoder must check that it doesn't +exceed that specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Also the decoder must check that "rep0" value is not larger than dictionary size +and is not larger than the number of already decoded bytes: + + if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0)) + return LZMA_RES_ERROR; + +Then the decoder must copy match bytes as described in +"The match symbols copying" section. + + +Rep Match +--------- + +If the LZMA decoder has decoded the value "1" with IsRep[state] variable, +we have "Rep Match" type. + +At first the LZMA decoder must check that it doesn't exceed +specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Also the decoder must return error, if the LZ window is empty: + + if (OutWindow.IsEmpty()) + return LZMA_RES_ERROR; + +If the match type is "Rep Match", the decoder uses one of the 4 variables of +distance history table to get the value of distance for current match. +And there are 4 corresponding ways of decoding flow. + +The decoder updates the distance history with the following scheme +depending from type of match: + +- "Rep Match 0" or "Short Rep Match": + ; LZMA doesn't update the distance history + +- "Rep Match 1": + UInt32 dist = rep1; + rep1 = rep0; + rep0 = dist; + +- "Rep Match 2": + UInt32 dist = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = dist; + +- "Rep Match 3": + UInt32 dist = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = dist; + +Then the decoder decodes exact subtype of "Rep Match" using "IsRepG0", "IsRep0Long", +"IsRepG1", "IsRepG2". + +If the subtype is "Short Rep Match", the decoder updates the state, puts +the one byte from window to current position in window and goes to next +MATCH/LITERAL symbol (the begin of main loop): + + state = UpdateState_ShortRep(state); + OutWindow.PutByte(OutWindow.GetByte(rep0 + 1)); + unpackSize--; + continue; + +In other cases (Rep Match 0/1/2/3), it decodes the zero-based +length of match with "RepLenDecoder" decoder: + + len = RepLenDecoder.Decode(&RangeDec, posState); + +Then it updates the state: + + state = UpdateState_Rep(state); + +Then the decoder must copy match bytes as described in +"The Match symbols copying" section. + + +The match symbols copying +------------------------- + +If we have the match (Simple Match or Rep Match 0/1/2/3), the decoder must +copy the sequence of bytes with calculated match distance and match length. +If uncompressed size is defined, LZMA decoder must check that it doesn't +exceed that specified uncompressed size: + + len += kMatchMinLen; + bool isError = false; + if (unpackSizeDefined && unpackSize < len) + { + len = (unsigned)unpackSize; + isError = true; + } + OutWindow.CopyMatch(rep0 + 1, len); + unpackSize -= len; + if (isError) + return LZMA_RES_ERROR; + +Then the decoder must go to the begin of main loop to decode next MATCH or LITERAL. + + + +NOTES +----- + +This specification doesn't describe the variant of decoder implementation +that supports partial decoding. Such partial decoding case can require some +changes in "end of stream" condition checks code. Also such code +can use additional status codes, returned by decoder. + +This specification uses C++ code with templates to simplify describing. +The optimized version of LZMA decoder doesn't need templates. +Such optimized version can use just two arrays of CProb variables: + 1) The dynamic array of CProb variables allocated for the Literal Decoder. + 2) The one common array that contains all other CProb variables. + + +References: + +1. G. N. N. Martin, Range encoding: an algorithm for removing redundancy + from a digitized message, Video & Data Recording Conference, + Southampton, UK, July 24-27, 1979. diff --git a/deps/LZMA-SDK/DOC/lzma.txt b/deps/LZMA-SDK/DOC/lzma.txt new file mode 100644 index 000000000..1f92142ea --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma.txt @@ -0,0 +1,328 @@ +LZMA compression +---------------- +Version: 9.35 + +This file describes LZMA encoding and decoding functions written in C language. + +LZMA is an improved version of famous LZ77 compression algorithm. +It was improved in way of maximum increasing of compression ratio, +keeping high decompression speed and low memory requirements for +decompressing. + +Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK) + +Also you can look source code for LZMA encoding and decoding: + C/Util/Lzma/LzmaUtil.c + + +LZMA compressed file format +--------------------------- +Offset Size Description + 0 1 Special LZMA properties (lc,lp, pb in encoded form) + 1 4 Dictionary size (little endian) + 5 8 Uncompressed size (little endian). -1 means unknown size + 13 Compressed data + + + +ANSI-C LZMA Decoder +~~~~~~~~~~~~~~~~~~~ + +Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58. +If you want to use old interfaces you can download previous version of LZMA SDK +from sourceforge.net site. + +To use ANSI-C LZMA Decoder you need the following files: +1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +Memory requirements for LZMA decoding +------------------------------------- + +Stack usage of LZMA decoding function for local variables is not +larger than 200-400 bytes. + +LZMA Decoder uses dictionary buffer and internal state structure. +Internal state structure consumes + state_size = (4 + (1.5 << (lc + lp))) KB +by default (lc=3, lp=0), state_size = 16 KB. + + +How To decompress data +---------------------- + +LZMA Decoder (ANSI-C version) now supports 2 interfaces: +1) Single-call Decompressing +2) Multi-call State Decompressing (zlib-like interface) + +You must use external allocator: +Example: +void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); } +void SzFree(void *p, void *address) { p = p; free(address); } +ISzAlloc alloc = { SzAlloc, SzFree }; + +You can use p = p; operator to disable compiler warnings. + + +Single-call Decompressing +------------------------- +When to use: RAM->RAM decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h +Compile defines: no defines +Memory Requirements: + - Input buffer: compressed size + - Output buffer: uncompressed size + - LZMA Internal Structures: state_size (16 KB for default settings) + +Interface: + int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc); + In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size + propData - LZMA properties (5 bytes) + propSize - size of propData buffer (5 bytes) + finishMode - It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + You can use LZMA_FINISH_END, when you know that + current output buffer covers last bytes of stream. + alloc - Memory allocator. + + Out: + destLen - processed output size + srcLen - processed input size + + Output: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + + If LZMA decoder sees end_marker before reaching output limit, it returns OK result, + and output value of destLen will be less than output buffer size limit. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + + +Multi-call State Decompressing (zlib-like interface) +---------------------------------------------------- + +When to use: file->file decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h + +Memory Requirements: + - Buffer for input stream: any size (for example, 16 KB) + - Buffer for output stream: any size (for example, 16 KB) + - LZMA Internal Structures: state_size (16 KB for default settings) + - LZMA dictionary (dictionary size is encoded in LZMA properties header) + +1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header: + unsigned char header[LZMA_PROPS_SIZE + 8]; + ReadFile(inFile, header, sizeof(header) + +2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties + + CLzmaDec state; + LzmaDec_Constr(&state); + res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc); + if (res != SZ_OK) + return res; + +3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop + + LzmaDec_Init(&state); + for (;;) + { + ... + int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode); + ... + } + + +4) Free all allocated structures + LzmaDec_Free(&state, &g_Alloc); + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +How To compress data +-------------------- + +Compile files: + 7zTypes.h + Threads.h + LzmaEnc.h + LzmaEnc.c + LzFind.h + LzFind.c + LzFindMt.h + LzFindMt.c + LzHash.h + +Memory Requirements: + - (dictSize * 11.5 + 6 MB) + state_size + +Lzma Encoder can use two memory allocators: +1) alloc - for small arrays. +2) allocBig - for big arrays. + +For example, you can use Large RAM Pages (2 MB) in allocBig allocator for +better compression speed. Note that Windows has bad implementation for +Large RAM Pages. +It's OK to use same allocator for alloc and allocBig. + + +Single-call Compression with callbacks +-------------------------------------- + +Look example code: + C/Util/Lzma/LzmaUtil.c + +When to use: file->file compressing + +1) you must implement callback structures for interfaces: +ISeqInStream +ISeqOutStream +ICompressProgress +ISzAlloc + +static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } +static void SzFree(void *p, void *address) { p = p; MyFree(address); } +static ISzAlloc g_Alloc = { SzAlloc, SzFree }; + + CFileSeqInStream inStream; + CFileSeqOutStream outStream; + + inStream.funcTable.Read = MyRead; + inStream.file = inFile; + outStream.funcTable.Write = MyWrite; + outStream.file = outFile; + + +2) Create CLzmaEncHandle object; + + CLzmaEncHandle enc; + + enc = LzmaEnc_Create(&g_Alloc); + if (enc == 0) + return SZ_ERROR_MEM; + + +3) initialize CLzmaEncProps properties; + + LzmaEncProps_Init(&props); + + Then you can change some properties in that structure. + +4) Send LZMA properties to LZMA Encoder + + res = LzmaEnc_SetProps(enc, &props); + +5) Write encoded properties to header + + Byte header[LZMA_PROPS_SIZE + 8]; + size_t headerSize = LZMA_PROPS_SIZE; + UInt64 fileSize; + int i; + + res = LzmaEnc_WriteProperties(enc, header, &headerSize); + fileSize = MyGetFileLength(inFile); + for (i = 0; i < 8; i++) + header[headerSize++] = (Byte)(fileSize >> (8 * i)); + MyWriteFileAndCheck(outFile, header, headerSize) + +6) Call encoding function: + res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, + NULL, &g_Alloc, &g_Alloc); + +7) Destroy LZMA Encoder Object + LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); + + +If callback function return some error code, LzmaEnc_Encode also returns that code +or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. + + +Single-call RAM->RAM Compression +-------------------------------- + +Single-call RAM->RAM Compression is similar to Compression with callbacks, +but you provide pointers to buffers instead of pointers to stream callbacks: + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) + + + +Defines +------- + +_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code. + +_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for + some structures will be doubled in that case. + +_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit. + +_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type. + + +_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder. + + +C++ LZMA Encoder/Decoder +~~~~~~~~~~~~~~~~~~~~~~~~ +C++ LZMA code use COM-like interfaces. So if you want to use it, +you can study basics of COM/OLE. +C++ LZMA code is just wrapper over ANSI-C code. + + +C++ Notes +~~~~~~~~~~~~~~~~~~~~~~~~ +If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling), +you must check that you correctly work with "new" operator. +7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator. +So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator: +operator new(size_t size) +{ + void *p = ::malloc(size); + if (p == 0) + throw CNewException(); + return p; +} +If you use MSCV that throws exception for "new" operator, you can compile without +"NewHandler.cpp". So standard exception will be used. Actually some code of +7-Zip catches any exception in internal code and converts it to HRESULT code. +So you don't need to catch CNewException, if you call COM interfaces of 7-Zip. + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html diff --git a/deps/unrar/acknow.txt b/deps/unrar/acknow.txt index a68b67277..60a772fa1 100644 --- a/deps/unrar/acknow.txt +++ b/deps/unrar/acknow.txt @@ -51,7 +51,7 @@ * RAR uses CRC32 function based on Intel Slicing-by-8 algorithm. Original Intel Slicing-by-8 code is available here: - http://sourceforge.net/projects/slicing-by-8/ + https://sourceforge.net/projects/slicing-by-8/ Original Intel Slicing-by-8 code is licensed under BSD License available at http://www.opensource.org/licenses/bsd-license.html diff --git a/deps/unrar/arcread.cpp b/deps/unrar/arcread.cpp index b0cf39f36..d1df6c041 100644 --- a/deps/unrar/arcread.cpp +++ b/deps/unrar/arcread.cpp @@ -785,7 +785,7 @@ size_t Archive::ReadHeader50() case HEAD_SERVICE: { FileHeader *hd=ShortBlock.HeaderType==HEAD_FILE ? &FileHead:&SubHead; - hd->Reset(); + hd->Reset(); // Clear hash, time fields and other stuff like flags. *(BaseBlock *)hd=ShortBlock; bool FileBlock=ShortBlock.HeaderType==HEAD_FILE; diff --git a/deps/unrar/cmddata.cpp b/deps/unrar/cmddata.cpp index 74e565a23..455a07b2c 100644 --- a/deps/unrar/cmddata.cpp +++ b/deps/unrar/cmddata.cpp @@ -56,7 +56,6 @@ void CommandData::ParseCommandLine(bool Preprocess,int argc, char *argv[]) // In Windows we may prefer to implement our own command line parser // to avoid replacing \" by " in standard parser. Such replacing corrupts // destination paths like "dest path\" in extraction commands. - // Also our own parser is Unicode compatible. const wchar *CmdLine=GetCommandLine(); wchar *Par; @@ -288,7 +287,10 @@ void CommandData::ProcessSwitch(const wchar *Switch) AppendArcNameToPath=APPENDARCNAME_DESTPATH; else if (Switch[2]=='1') - AppendArcNameToPath=APPENDARCNAME_OWNDIR; + AppendArcNameToPath=APPENDARCNAME_OWNSUBDIR; + else + if (Switch[2]=='2') + AppendArcNameToPath=APPENDARCNAME_OWNDIR; break; #ifndef SFX_MODULE case 'G': @@ -436,9 +438,9 @@ void CommandData::ProcessSwitch(const wchar *Switch) wcsncpyz(EmailTo,Switch[4]!=0 ? Switch+4:L"@",ASIZE(EmailTo)); break; } - if (wcsicomp(Switch+1,L"M")==0) + if (wcsicomp(Switch+1,L"M")==0) // For compatibility with pre-WinRAR 6.0 -im syntax. Replaced with -idv. { - MoreInfo=true; + VerboseOutput=true; break; } if (wcsicomp(Switch+1,L"NUL")==0) @@ -465,6 +467,12 @@ void CommandData::ProcessSwitch(const wchar *Switch) case 'P': DisablePercentage=true; break; + case 'N': + DisableNames=true; + break; + case 'V': + VerboseOutput=true; + break; } break; } @@ -539,7 +547,6 @@ void CommandData::ProcessSwitch(const wchar *Switch) case 'D': Type=FILTER_DELTA; break; case 'A': Type=FILTER_AUDIO; break; case 'C': Type=FILTER_RGB; break; - case 'I': Type=FILTER_ITANIUM; break; case 'R': Type=FILTER_ARM; break; } if (*Str=='+' || *Str=='-') diff --git a/deps/unrar/cmddata.hpp b/deps/unrar/cmddata.hpp index 0c6d14893..719b4007e 100644 --- a/deps/unrar/cmddata.hpp +++ b/deps/unrar/cmddata.hpp @@ -11,7 +11,6 @@ enum IS_PROCESS_FILE_FLAGS {IPFF_EXCLUDE_PARENT=1}; class CommandData:public RAROptions { private: - void ProcessSwitchesString(const wchar *Str); void ProcessSwitch(const wchar *Switch); void BadSwitch(const wchar *Switch); uint GetExclAttr(const wchar *Str,bool &Dir); @@ -34,6 +33,7 @@ class CommandData:public RAROptions void ParseEnvVar(); void ReadConfig(); void PreprocessArg(const wchar *Arg); + void ProcessSwitchesString(const wchar *Str); void OutTitle(); void OutHelp(RAR_EXIT ExitCode); bool IsSwitch(int Ch); diff --git a/deps/unrar/consio.cpp b/deps/unrar/consio.cpp index 196066ec9..fedd5c05c 100644 --- a/deps/unrar/consio.cpp +++ b/deps/unrar/consio.cpp @@ -164,7 +164,7 @@ static void GetPasswordText(wchar *Str,uint MaxLength) SetConsoleMode(hConIn,ConInMode); SetConsoleMode(hConOut,ConOutMode); #else - char StrA[MAXPASSWORD]; + char StrA[MAXPASSWORD*4]; // "*4" for multibyte UTF-8 characters. #if defined(_EMX) || defined (__VMS) fgets(StrA,ASIZE(StrA)-1,stdin); #elif defined(__sun) @@ -248,6 +248,12 @@ bool getwstr(wchar *str,size_t n) ErrHandler.Exit(RARX_USERBREAK); } StrA[ReadSize]=0; + + // We expect ANSI encoding here, but "echo text|rar ..." to pipe to RAR, + // such as send passwords, we get OEM encoding by default, unless we + // use "chcp" in console. But we avoid OEM to ANSI conversion, + // because we also want to handle ANSI files redirection correctly, + // like "rar ... < ansifile.txt". CharToWide(&StrA[0],str,n); cleandata(&StrA[0],StrA.Size()); // We can use this function to enter passwords. } @@ -305,7 +311,7 @@ int Ask(const wchar *AskStr) for (int I=0;I4 ? L"\n":L" "):L", "); + eprintf(I==0 ? (NumItems>3 ? L"\n":L" "):L", "); int KeyPos=ItemKeyPos[I]; for (int J=0;J VS_VERSION_INFO VERSIONINFO -FILEVERSION 5, 91, 100, 3470 -PRODUCTVERSION 5, 91, 100, 3470 +FILEVERSION 6, 1, 100, 3756 +PRODUCTVERSION 6, 1, 100, 3756 FILEOS VOS__WINDOWS32 FILETYPE VFT_APP { @@ -14,9 +14,9 @@ FILETYPE VFT_APP VALUE "CompanyName", "Alexander Roshal\0" VALUE "ProductName", "RAR decompression library\0" VALUE "FileDescription", "RAR decompression library\0" - VALUE "FileVersion", "5.91.0\0" - VALUE "ProductVersion", "5.91.0\0" - VALUE "LegalCopyright", "Copyright © Alexander Roshal 1993-2020\0" + VALUE "FileVersion", "6.1.0\0" + VALUE "ProductVersion", "6.1.0\0" + VALUE "LegalCopyright", "Copyright © Alexander Roshal 1993-2021\0" VALUE "OriginalFilename", "Unrar.dll\0" } } diff --git a/deps/unrar/errhnd.cpp b/deps/unrar/errhnd.cpp index c86d17606..18e91973e 100644 --- a/deps/unrar/errhnd.cpp +++ b/deps/unrar/errhnd.cpp @@ -15,6 +15,7 @@ void ErrorHandler::Clean() UserBreak=false; MainExit=false; DisableShutdown=false; + ReadErrIgnoreAll=false; } @@ -56,24 +57,34 @@ void ErrorHandler::ReadError(const wchar *FileName) ReadErrorMsg(FileName); #endif #if !defined(SILENT) || defined(RARDLL) - Exit(RARX_FATAL); + Exit(RARX_READ); #endif } -bool ErrorHandler::AskRepeatRead(const wchar *FileName) +void ErrorHandler::AskRepeatRead(const wchar *FileName,bool &Ignore,bool &Retry,bool &Quit) { + SetErrorCode(RARX_READ); #if !defined(SILENT) && !defined(SFX_MODULE) if (!Silent) { + uiMsg(UIERROR_FILEREAD,UINULL,FileName); SysErrMsg(); - bool Repeat=uiAskRepeatRead(FileName); - if (!Repeat) // Disable shutdown if user pressed Cancel in error dialog. - DisableShutdown=true; - return Repeat; + if (ReadErrIgnoreAll) + Ignore=true; + else + { + bool All=false; + uiAskRepeatRead(FileName,Ignore,All,Retry,Quit); + if (All) + ReadErrIgnoreAll=Ignore=true; + if (Quit) // Disable shutdown if user select Quit in read error prompt. + DisableShutdown=true; + } + return; } #endif - return false; + Ignore=true; // Saving the file part for -y or -inul or "Ignore all" choice. } @@ -189,7 +200,7 @@ void ErrorHandler::ReadErrorMsg(const wchar *ArcName,const wchar *FileName) { uiMsg(UIERROR_FILEREAD,ArcName,FileName); SysErrMsg(); - SetErrorCode(RARX_FATAL); + SetErrorCode(RARX_READ); } diff --git a/deps/unrar/errhnd.hpp b/deps/unrar/errhnd.hpp index 3455dac6b..06f4f616f 100644 --- a/deps/unrar/errhnd.hpp +++ b/deps/unrar/errhnd.hpp @@ -15,9 +15,11 @@ enum RAR_EXIT // RAR exit code. RARX_CREATE = 9, RARX_NOFILES = 10, RARX_BADPWD = 11, + RARX_READ = 12, RARX_USERBREAK = 255 }; + class ErrorHandler { private: @@ -26,6 +28,7 @@ class ErrorHandler bool EnableBreak; bool Silent; bool DisableShutdown; // Shutdown is not suitable after last error. + bool ReadErrIgnoreAll; public: ErrorHandler(); void Clean(); @@ -33,7 +36,7 @@ class ErrorHandler void OpenError(const wchar *FileName); void CloseError(const wchar *FileName); void ReadError(const wchar *FileName); - bool AskRepeatRead(const wchar *FileName); + void AskRepeatRead(const wchar *FileName,bool &Ignore,bool &Retry,bool &Quit); void WriteError(const wchar *ArcName,const wchar *FileName); void WriteErrorFAT(const wchar *FileName); bool AskRepeatWrite(const wchar *FileName,bool DiskFull); diff --git a/deps/unrar/extinfo.hpp b/deps/unrar/extinfo.hpp index 2b0005daa..f3c7511b4 100644 --- a/deps/unrar/extinfo.hpp +++ b/deps/unrar/extinfo.hpp @@ -7,7 +7,7 @@ bool ExtractSymlink(CommandData *Cmd,ComprDataIO &DataIO,Archive &Arc,const wcha void SetUnixOwner(Archive &Arc,const wchar *FileName); #endif -bool ExtractHardlink(wchar *NameNew,wchar *NameExisting,size_t NameExistingSize); +bool ExtractHardlink(CommandData *Cmd,wchar *NameNew,wchar *NameExisting,size_t NameExistingSize); void GetStreamNameNTFS(Archive &Arc,wchar *StreamName,size_t MaxSize); diff --git a/deps/unrar/extract.cpp b/deps/unrar/extract.cpp index 76ee2d4a7..dc824aa97 100644 --- a/deps/unrar/extract.cpp +++ b/deps/unrar/extract.cpp @@ -42,6 +42,7 @@ void CmdExtract::DoExtract() Cmd->Password.Clean(); // Clean user entered password before processing next archive. ReconstructDone=false; // Must be reset here, not in ExtractArchiveInit(). + UseExactVolName=false; // Must be reset here, not in ExtractArchiveInit(). while (true) { EXTRACT_ARC_CODE Code=ExtractArchive(); @@ -140,7 +141,7 @@ EXTRACT_ARC_CODE CmdExtract::ExtractArchive() return EXTRACT_ARC_NEXT; #ifndef SFX_MODULE - if (Arc.Volume && !Arc.FirstVolume) + if (Arc.Volume && !Arc.FirstVolume && !UseExactVolName) { wchar FirstVolName[NM]; VolNameToFirstName(ArcName,FirstVolName,ASIZE(FirstVolName),Arc.NewNumbering); @@ -158,6 +159,16 @@ EXTRACT_ARC_CODE CmdExtract::ExtractArchive() if (Arc.Volume) { +#ifndef SFX_MODULE + // Try to speed up extraction for independent solid volumes by starting + // extraction from non-first volume if we can. + if (!UseExactVolName && Arc.Solid && DetectStartVolume(Arc.FileName,Arc.NewNumbering)) + { + UseExactVolName=true; + return EXTRACT_ARC_REPEAT; + } +#endif + // Calculate the total size of all accessible volumes. // This size is necessary to display the correct total progress indicator. @@ -318,11 +329,11 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) Arc.ConvertAttributes(); #if !defined(SFX_MODULE) && !defined(RARDLL) - if (Arc.FileHead.SplitBefore && FirstFile) + if (Arc.FileHead.SplitBefore && FirstFile && !UseExactVolName) { wchar CurVolName[NM]; wcsncpyz(CurVolName,ArcName,ASIZE(CurVolName)); - VolNameToFirstName(ArcName,ArcName,ASIZE(ArcName),Arc.NewNumbering); + GetFirstVolIfFullSet(ArcName,Arc.NewNumbering,ArcName,ASIZE(ArcName)); if (wcsicomp(ArcName,CurVolName)!=0 && FileExist(ArcName)) { @@ -575,7 +586,7 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) TotalFileCount++; } FileCount++; - if (Command!='I') + if (Command!='I' && !Cmd->DisableNames) if (SkipSolid) mprintf(St(MExtrSkipFile),ArcFileName); else @@ -594,8 +605,10 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) mprintf(St(MExtrFile),DestFileName); break; } - if (!Cmd->DisablePercentage) + if (!Cmd->DisablePercentage && !Cmd->DisableNames) mprintf(L" "); + if (Cmd->DisableNames) + uiEolAfterMsg(); // Avoid erasing preceding messages by percentage indicator in -idn mode. DataIO.CurUnpRead=0; DataIO.CurUnpWrite=0; @@ -641,7 +654,7 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) ExtrPrepareName(Arc,Arc.FileHead.RedirName,NameExisting,ASIZE(NameExisting)); if (FileCreateMode && *NameExisting!=0) // *NameExisting can be 0 in case of excessive -ap switch. if (Type==FSREDIR_HARDLINK) - LinkSuccess=ExtractHardlink(DestFileName,NameExisting,ASIZE(NameExisting)); + LinkSuccess=ExtractHardlink(Cmd,DestFileName,NameExisting,ASIZE(NameExisting)); else LinkSuccess=ExtractFileCopy(CurFile,Arc.FileName,DestFileName,NameExisting,ASIZE(NameExisting)); } @@ -653,7 +666,7 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) } else { - uiMsg(UIERROR_UNKNOWNEXTRA, Arc.FileName, DestFileName); + uiMsg(UIERROR_UNKNOWNEXTRA,Arc.FileName,DestFileName); LinkSuccess=false; } @@ -711,7 +724,7 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) { if (ValidCRC) { - if (Command!='P' && Command!='I') + if (Command!='P' && Command!='I' && !Cmd->DisableNames) mprintf(L"%s%s ",Cmd->DisablePercentage ? L" ":L"\b\b\b\b\b ", Arc.FileHead.FileHash.Type==HASH_NONE ? L" ?":St(MOk)); } @@ -734,7 +747,13 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) } } else - mprintf(L"\b\b\b\b\b "); + { + // We check SkipSolid to remove percent for skipped solid files only. + // We must not apply these \b to links with ShowChecksum==false + // and their possible error messages. + if (SkipSolid) + mprintf(L"\b\b\b\b\b "); + } // If we successfully unpacked a hard link, we wish to set its file // attributes. Hard link shares file metadata with link target, @@ -781,7 +800,13 @@ bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) Arc.FileHead.FileAttr&=~FILE_ATTRIBUTE_ARCHIVE; #endif if (!Cmd->IgnoreGeneralAttr && !SetFileAttr(DestFileName,Arc.FileHead.FileAttr)) + { uiMsg(UIERROR_FILEATTR,Arc.FileName,DestFileName); + // Android cannot set file attributes and while UIERROR_FILEATTR + // above is handled by Android RAR silently, this call would cause + // "Operation not permitted" message for every unpacked file. + ErrHandler.SysErrMsg(); + } PrevProcessed=true; } @@ -874,11 +899,21 @@ void CmdExtract::ExtrPrepareName(Archive &Arc,const wchar *ArcFileName,wchar *De #ifndef SFX_MODULE if (Cmd->AppendArcNameToPath!=APPENDARCNAME_NONE) { - if (Cmd->AppendArcNameToPath==APPENDARCNAME_DESTPATH) - wcsncatz(DestName,PointToName(Arc.FirstVolumeName),DestSize); - else - wcsncpyz(DestName,Arc.FirstVolumeName,DestSize); // To archive own dir. - SetExt(DestName,NULL,DestSize); + switch(Cmd->AppendArcNameToPath) + { + case APPENDARCNAME_DESTPATH: // To subdir of destination path. + wcsncatz(DestName,PointToName(Arc.FirstVolumeName),DestSize); + SetExt(DestName,NULL,DestSize); + break; + case APPENDARCNAME_OWNSUBDIR: // To subdir of archive own dir. + wcsncpyz(DestName,Arc.FirstVolumeName,DestSize); + SetExt(DestName,NULL,DestSize); + break; + case APPENDARCNAME_OWNDIR: // To archive own dir. + wcsncpyz(DestName,Arc.FirstVolumeName,DestSize); + RemoveNameFromPath(DestName); + break; + } AddEndSlash(DestName,DestSize); } #endif @@ -1048,8 +1083,11 @@ void CmdExtract::ExtrCreateDir(Archive &Arc,const wchar *ArcFileName) { if (Cmd->Test) { - mprintf(St(MExtrTestFile),ArcFileName); - mprintf(L" %s",St(MOk)); + if (!Cmd->DisableNames) + { + mprintf(St(MExtrTestFile),ArcFileName); + mprintf(L" %s",St(MOk)); + } return; } @@ -1068,26 +1106,33 @@ void CmdExtract::ExtrCreateDir(Archive &Arc,const wchar *ArcFileName) } if (!DirExist) { - CreatePath(DestFileName,true); + CreatePath(DestFileName,true,Cmd->DisableNames); MDCode=MakeDir(DestFileName,!Cmd->IgnoreGeneralAttr,Arc.FileHead.FileAttr); - if (MDCode!=MKDIR_SUCCESS) + if (MDCode!=MKDIR_SUCCESS && !IsNameUsable(DestFileName)) { + uiMsg(UIMSG_CORRECTINGNAME,Arc.FileName); wchar OrigName[ASIZE(DestFileName)]; wcsncpyz(OrigName,DestFileName,ASIZE(OrigName)); MakeNameUsable(DestFileName,true); - CreatePath(DestFileName,true); - MDCode=MakeDir(DestFileName,!Cmd->IgnoreGeneralAttr,Arc.FileHead.FileAttr); #ifndef SFX_MODULE - if (MDCode==MKDIR_SUCCESS) - uiMsg(UIERROR_RENAMING,Arc.FileName,OrigName,DestFileName); + uiMsg(UIERROR_RENAMING,Arc.FileName,OrigName,DestFileName); #endif + DirExist=FileExist(DestFileName) && IsDir(GetFileAttr(DestFileName)); + if (!DirExist) + { + CreatePath(DestFileName,true,Cmd->DisableNames); + MDCode=MakeDir(DestFileName,!Cmd->IgnoreGeneralAttr,Arc.FileHead.FileAttr); + } } } } if (MDCode==MKDIR_SUCCESS) { - mprintf(St(MCreatDir),DestFileName); - mprintf(L" %s",St(MOk)); + if (!Cmd->DisableNames) + { + mprintf(St(MCreatDir),DestFileName); + mprintf(L" %s",St(MOk)); + } PrevProcessed=true; } else @@ -1141,6 +1186,9 @@ bool CmdExtract::ExtrCreateFile(Archive &Arc,File &CurFile) if (!UserReject) { ErrHandler.CreateErrorMsg(Arc.FileName,DestFileName); + if (FileExist(DestFileName) && IsDir(GetFileAttr(DestFileName))) + uiMsg(UIERROR_DIRNAMEEXISTS); + #ifdef RARDLL Cmd->DllError=ERAR_ECREATE; #endif @@ -1153,7 +1201,7 @@ bool CmdExtract::ExtrCreateFile(Archive &Arc,File &CurFile) MakeNameUsable(DestFileName,true); - CreatePath(DestFileName,true); + CreatePath(DestFileName,true,Cmd->DisableNames); if (FileCreate(Cmd,&CurFile,DestFileName,ASIZE(DestFileName),&UserReject,Arc.FileHead.UnpSize,&Arc.FileHead.mtime,true)) { #ifndef SFX_MODULE @@ -1196,3 +1244,104 @@ bool CmdExtract::CheckUnpVer(Archive &Arc,const wchar *ArcFileName) } return !WrongVer; } + + +#ifndef SFX_MODULE +// To speed up solid volumes extraction, try to find a non-first start volume, +// which still allows to unpack all files. It is possible for independent +// solid volumes with solid statistics reset in the beginning. +bool CmdExtract::DetectStartVolume(const wchar *VolName,bool NewNumbering) +{ + wchar *ArgName=Cmd->FileArgs.GetString(); + Cmd->FileArgs.Rewind(); + if (ArgName!=NULL && (wcscmp(ArgName,L"*")==0 || wcscmp(ArgName,L"*.*")==0)) + return false; // No need to check further for * and *.* masks. + + wchar StartName[NM]; + *StartName=0; + + // Start search from first volume if all volumes preceding current are available. + wchar NextName[NM]; + GetFirstVolIfFullSet(VolName,NewNumbering,NextName,ASIZE(NextName)); + + bool Matched=false; + while (!Matched) + { + Archive Arc(Cmd); + if (!Arc.Open(NextName) || !Arc.IsArchive(false) || !Arc.Volume) + break; + + bool OpenNext=false; + while (Arc.ReadHeader()>0) + { + Wait(); + + HEADER_TYPE HeaderType=Arc.GetHeaderType(); + if (HeaderType==HEAD_ENDARC) + { + OpenNext|=Arc.EndArcHead.NextVolume; // Allow open next volume. + break; + } + if (HeaderType==HEAD_FILE) + { + if (!Arc.FileHead.SplitBefore) + { + if (!Arc.FileHead.Solid) // Can start extraction from here. + wcsncpyz(StartName,NextName,ASIZE(StartName)); + + if (Cmd->IsProcessFile(Arc.FileHead,NULL,MATCH_WILDSUBPATH,0,NULL,0)!=0) + { + Matched=true; // First matched file found, must stop further scan. + break; + } + } + if (Arc.FileHead.SplitAfter) + { + OpenNext=true; // Allow open next volume. + break; + } + } + Arc.SeekToNext(); + } + Arc.Close(); + + if (!OpenNext) + break; + + NextVolumeName(NextName,ASIZE(NextName),!Arc.NewNumbering); + } + bool NewStartFound=wcscmp(VolName,StartName)!=0; + if (NewStartFound) // Found a new volume to start extraction. + wcsncpyz(ArcName,StartName,ASIZE(ArcName)); + + return NewStartFound; +} +#endif + + +#ifndef SFX_MODULE +// Return the first volume name if all volumes preceding the specified +// are available. Otherwise return the specified volume name. +void CmdExtract::GetFirstVolIfFullSet(const wchar *SrcName,bool NewNumbering,wchar *DestName,size_t DestSize) +{ + wchar FirstVolName[NM]; + VolNameToFirstName(SrcName,FirstVolName,ASIZE(FirstVolName),NewNumbering); + wchar NextName[NM]; + wcsncpyz(NextName,FirstVolName,ASIZE(NextName)); + wchar ResultName[NM]; + wcsncpyz(ResultName,SrcName,ASIZE(ResultName)); + while (true) + { + if (wcscmp(SrcName,NextName)==0) + { + wcsncpyz(ResultName,FirstVolName,DestSize); + break; + } + if (!FileExist(NextName)) + break; + NextVolumeName(NextName,ASIZE(NextName),!NewNumbering); + } + wcsncpyz(DestName,ResultName,DestSize); +} + +#endif \ No newline at end of file diff --git a/deps/unrar/extract.hpp b/deps/unrar/extract.hpp index 325928dc0..159759b56 100644 --- a/deps/unrar/extract.hpp +++ b/deps/unrar/extract.hpp @@ -20,8 +20,12 @@ class CmdExtract void ExtrCreateDir(Archive &Arc,const wchar *ArcFileName); bool ExtrCreateFile(Archive &Arc,File &CurFile); bool CheckUnpVer(Archive &Arc,const wchar *ArcFileName); +#ifndef SFX_MODULE + bool DetectStartVolume(const wchar *VolName,bool NewNumbering); + void GetFirstVolIfFullSet(const wchar *SrcName,bool NewNumbering,wchar *DestName,size_t DestSize); +#endif - RarTime StartTime; // time when extraction started + RarTime StartTime; // Time when extraction started. CommandData *Cmd; @@ -34,6 +38,7 @@ class CmdExtract bool FirstFile; bool AllMatchesExact; bool ReconstructDone; + bool UseExactVolName; // If any non-zero solid file was successfully unpacked before current. // If true and if current encrypted file is broken, obviously diff --git a/deps/unrar/filcreat.cpp b/deps/unrar/filcreat.cpp index a64a7d4d7..620bee813 100644 --- a/deps/unrar/filcreat.cpp +++ b/deps/unrar/filcreat.cpp @@ -49,7 +49,7 @@ bool FileCreate(RAROptions *Cmd,File *NewFile,wchar *Name,size_t MaxNameSize, if (NewFile!=NULL && NewFile->Create(Name,FileMode)) return true; - CreatePath(Name,true); + CreatePath(Name,true,Cmd->DisableNames); return NewFile!=NULL ? NewFile->Create(Name,FileMode):DelFile(Name); } diff --git a/deps/unrar/file.cpp b/deps/unrar/file.cpp index e506fde7f..5a8099ec5 100644 --- a/deps/unrar/file.cpp +++ b/deps/unrar/file.cpp @@ -8,7 +8,6 @@ File::File() LastWrite=false; HandleType=FILE_HANDLENORMAL; SkipClose=false; - IgnoreReadErrors=false; ErrorType=FILE_SUCCESS; OpenShared=false; AllowDelete=true; @@ -18,6 +17,8 @@ File::File() NoSequentialRead=false; CreateMode=FMF_UNDEFINED; #endif + ReadErrorMode=FREM_ASK; + TruncatedAfterReadError=false; } @@ -37,6 +38,7 @@ void File::operator = (File &SrcFile) NewFile=SrcFile.NewFile; LastWrite=SrcFile.LastWrite; HandleType=SrcFile.HandleType; + TruncatedAfterReadError=SrcFile.TruncatedAfterReadError; wcsncpyz(FileName,SrcFile.FileName,ASIZE(FileName)); SrcFile.SkipClose=true; } @@ -118,12 +120,12 @@ bool File::Open(const wchar *Name,uint Mode) #ifdef _OSF_SOURCE extern "C" int flock(int, int); #endif - if (!OpenShared && UpdateMode && handle>=0 && flock(handle,LOCK_EX|LOCK_NB)==-1) { close(handle); return false; } + #endif if (handle==-1) hNewFile=FILE_BAD_HANDLE; @@ -146,6 +148,7 @@ bool File::Open(const wchar *Name,uint Mode) { hFile=hNewFile; wcsncpyz(FileName,Name,ASIZE(FileName)); + TruncatedAfterReadError=false; } return Success; } @@ -369,9 +372,12 @@ bool File::Write(const void *Data,size_t Size) int File::Read(void *Data,size_t Size) { + if (TruncatedAfterReadError) + return 0; + int64 FilePos=0; // Initialized only to suppress some compilers warning. - if (IgnoreReadErrors) + if (ReadErrorMode==FREM_IGNORE) FilePos=Tell(); int ReadSize; while (true) @@ -381,7 +387,7 @@ int File::Read(void *Data,size_t Size) { ErrorType=FILE_READERROR; if (AllowExceptions) - if (IgnoreReadErrors) + if (ReadErrorMode==FREM_IGNORE) { ReadSize=0; for (size_t I=0;IDisableNames); #ifdef _WIN_ALL bool Success=CreateHardLink(NameNew,NameExisting,NULL)!=0; diff --git a/deps/unrar/list.cpp b/deps/unrar/list.cpp index 77c10413d..476fd3c65 100644 --- a/deps/unrar/list.cpp +++ b/deps/unrar/list.cpp @@ -28,7 +28,7 @@ void ListArchive(CommandData *Cmd) if (!Arc.WOpen(ArcName)) continue; bool FileMatched=true; - while (1) + while (true) { int64 TotalPackSize=0,TotalUnpSize=0; uint FileCount=0; @@ -69,7 +69,7 @@ void ListArchive(CommandData *Cmd) wchar VolNumText[50]; *VolNumText=0; - while(Arc.ReadHeader()>0) + while (Arc.ReadHeader()>0) { Wait(); // Allow quit listing with Ctrl+C. HEADER_TYPE HeaderType=Arc.GetHeaderType(); diff --git a/deps/unrar/loclang.hpp b/deps/unrar/loclang.hpp index a82aca5cd..a1cd544a3 100644 --- a/deps/unrar/loclang.hpp +++ b/deps/unrar/loclang.hpp @@ -4,6 +4,7 @@ #define MYesNoAllRenQ L"_Yes_No_All_nEver_Rename_Quit" #define MContinueQuit L"_Continue_Quit" #define MRetryAbort L"_Retry_Abort" +#define MIgnoreAllRetryQuit L"_Ignore_iGnore all_Retry_Quit" #define MCopyright L"\nRAR %s Copyright (c) 1993-%d Alexander Roshal %d %s %d" #define MRegTo L"\nRegistered to %s\n" #define MShare L"\nTrial version Type 'rar -?' for help\n" @@ -54,7 +55,7 @@ #define MCHelpSwm L"\n - Stop switches scanning" #define MCHelpSwAT L"\n @[+] Disable [enable] file lists" #define MCHelpSwAC L"\n ac Clear Archive attribute after compression or extraction" -#define MCHelpSwAD L"\n ad Append archive name to destination path" +#define MCHelpSwAD L"\n ad[1,2] Alternate destination path" #define MCHelpSwAG L"\n ag[format] Generate archive name using the current date" #define MCHelpSwAI L"\n ai Ignore file attributes" #define MCHelpSwAO L"\n ao Add files with Archive attribute set" @@ -79,7 +80,7 @@ #define MCHelpSwF L"\n f Freshen files" #define MCHelpSwHP L"\n hp[password] Encrypt both file data and headers" #define MCHelpSwHT L"\n ht[b|c] Select hash type [BLAKE2,CRC32] for file checksum" -#define MCHelpSwIDP L"\n id[c,d,p,q] Disable messages" +#define MCHelpSwIDP L"\n id[c,d,n,p,q] Display or disable messages" #define MCHelpSwIEML L"\n ieml[addr] Send archive by email" #define MCHelpSwIERR L"\n ierr Send all messages to stderr" #define MCHelpSwILOG L"\n ilog[name] Log errors to file" @@ -320,7 +321,7 @@ #define MStreamUnknown L"\nWARNING: Unknown format of %s stream data\n" #define MInvalidName L"\nERROR: Invalid file name %s" #define MProcessArc L"\n\nProcessing archive %s" -#define MCorrectingName L"\nWARNING: Attempting to correct the invalid file name" +#define MCorrectingName L"\nWARNING: Attempting to correct the invalid file or directory name" #define MUnpCannotMerge L"\nWARNING: You need to start extraction from a previous volume to unpack %s" #define MUnknownOption L"\nERROR: Unknown option: %s" #define MSubHeadCorrupt L"\nERROR: Corrupt data header found, ignored" @@ -351,7 +352,7 @@ #define MRecVolLimit L"\nTotal number of usual and recovery volumes must not exceed %d" #define MVolumeNumber L"volume %d" #define MCannotDelete L"\nCannot delete %s" -#define MRecycleFailed L"\nCannot move some files and folders to Recycle Bin" +#define MRecycleFailed L"\nCannot move some files and directories to Recycle Bin" #define MCalcCRC L"\nCalculating the checksum" #define MTooLargeSFXArc L"\nToo large SFX archive. Windows cannot run the executable file exceeding 4 GB." #define MCalcCRCAllVol L"\nCalculating checksums of all volumes." @@ -380,3 +381,7 @@ #define MDictOutMem L"\nNot enough memory for %d MB compression dictionary, changed to %d MB." #define MUseSmalllerDict L"\nPlease use a smaller compression dictionary." #define MOpenErrAtime L"\nYou may need to remove -tsp switch to open this file." +#define MErrReadInfo L"\nChoose 'Ignore' to continue with the already read file part only, 'Ignore all' to do it for all read errors, 'Retry' to repeat read and 'Quit' to abort." +#define MErrReadTrunc L"\n%s is archived incompletely because of read error.\n" +#define MErrReadCount L"\n%u files are archived incompletely because of read errors." +#define MDirNameExists L"\nDirectory with such name already exists" diff --git a/deps/unrar/options.hpp b/deps/unrar/options.hpp index fd33d3d15..993b21963 100644 --- a/deps/unrar/options.hpp +++ b/deps/unrar/options.hpp @@ -61,7 +61,8 @@ enum SAVECOPY_MODE { enum APPENDARCNAME_MODE { - APPENDARCNAME_NONE=0,APPENDARCNAME_DESTPATH,APPENDARCNAME_OWNDIR + APPENDARCNAME_NONE=0,APPENDARCNAME_DESTPATH,APPENDARCNAME_OWNSUBDIR, + APPENDARCNAME_OWNDIR }; enum POWER_MODE { @@ -132,6 +133,7 @@ class RAROptions bool DisablePercentage; bool DisableCopyright; bool DisableDone; + bool DisableNames; bool PrintVersion; int Solid; int SolidCount; @@ -146,7 +148,7 @@ class RAROptions Array NextVolSizes; uint CurVolNum; bool AllYes; - bool MoreInfo; // -im, show more information, used only in "WinRAR t" now. + bool VerboseOutput; // -iv, display verbose output, used only in "WinRAR t" now. bool DisableSortSolid; int ArcTime; int ConvertNames; diff --git a/deps/unrar/pathfn.cpp b/deps/unrar/pathfn.cpp index 278863c74..41594bf95 100644 --- a/deps/unrar/pathfn.cpp +++ b/deps/unrar/pathfn.cpp @@ -655,7 +655,7 @@ wchar* VolNameToFirstName(const wchar *VolName,wchar *FirstName,size_t MaxSize,b } if (!FileExist(FirstName)) { - // If the first volume, which name we just generated, is not exist, + // If the first volume, which name we just generated, does not exist, // check if volume with same name and any other extension is available. // It can help in case of *.exe or *.sfx first volume. wchar Mask[NM]; diff --git a/deps/unrar/rijndael.cpp b/deps/unrar/rijndael.cpp index 89f287f0c..cbb1722fd 100644 --- a/deps/unrar/rijndael.cpp +++ b/deps/unrar/rijndael.cpp @@ -64,7 +64,7 @@ inline void Copy128(byte *dest,const byte *src) Rijndael::Rijndael() { - if (S[0]==0) + //if (S[0]==0) GenerateTables(); CBCMode = true; // Always true for RAR. } diff --git a/deps/unrar/secpassword.cpp b/deps/unrar/secpassword.cpp index 4865b3fd0..b99e53af0 100644 --- a/deps/unrar/secpassword.cpp +++ b/deps/unrar/secpassword.cpp @@ -51,7 +51,7 @@ class CryptLoader }; // We need to call FreeLibrary when RAR is exiting. -CryptLoader GlobalCryptLoader; +static CryptLoader GlobalCryptLoader; #endif SecPassword::SecPassword() diff --git a/deps/unrar/ui.hpp b/deps/unrar/ui.hpp index 6bd224dac..2654387c2 100644 --- a/deps/unrar/ui.hpp +++ b/deps/unrar/ui.hpp @@ -38,7 +38,8 @@ enum UIMESSAGE_CODE { UIERROR_PATHTOOLONG, UIERROR_DIRSCAN, UIERROR_UOWNERGET, UIERROR_UOWNERBROKEN, UIERROR_UOWNERGETOWNERID, UIERROR_UOWNERGETGROUPID, UIERROR_UOWNERSET, UIERROR_ULINKREAD, UIERROR_ULINKEXIST, - UIERROR_OPENPRESERVEATIME, + UIERROR_OPENPRESERVEATIME, UIERROR_READERRTRUNCATED, UIERROR_READERRCOUNT, + UIERROR_DIRNAMEEXISTS, UIMSG_FIRST, UIMSG_STRING, UIMSG_BUILD, UIMSG_RRSEARCH, UIMSG_ANALYZEFILEDATA, @@ -92,16 +93,18 @@ bool uiIsGlobalPasswordSet(); enum UIALARM_TYPE {UIALARM_ERROR, UIALARM_INFO, UIALARM_QUESTION}; void uiAlarm(UIALARM_TYPE Type); +void uiEolAfterMsg(); bool uiAskNextVolume(wchar *VolName,size_t MaxSize); -bool uiAskRepeatRead(const wchar *FileName); +#if !defined(SILENT) && !defined(SFX_MODULE) +void uiAskRepeatRead(const wchar *FileName,bool &Ignore,bool &All,bool &Retry,bool &Quit); +#endif bool uiAskRepeatWrite(const wchar *FileName,bool DiskFull); #ifndef SFX_MODULE const wchar *uiGetMonthName(int Month); #endif - class uiMsgStore { private: diff --git a/deps/unrar/uiconsole.cpp b/deps/unrar/uiconsole.cpp index 7b4998dda..ceae1a7c5 100644 --- a/deps/unrar/uiconsole.cpp +++ b/deps/unrar/uiconsole.cpp @@ -1,3 +1,5 @@ +static bool AnyMessageDisplayed=false; // For console -idn switch. + // Purely user interface function. Gets and returns user input. UIASKREP_RESULT uiAskReplace(wchar *Name,size_t MaxNameSize,int64 FileSize,RarTime *FileTime,uint Flags) { @@ -83,6 +85,19 @@ void uiProcessProgress(const char *Command,int64 CurSize,int64 TotalSize) void uiMsgStore::Msg() { + // When creating volumes, AnyMessageDisplayed must be reset for UIEVENT_NEWARCHIVE, + // so it ignores this and all earlier messages like UIEVENT_PROTECTEND + // and UIEVENT_PROTECTEND, because they precede "Creating archive" message + // and do not interfere with -idn and file names. If we do not ignore them, + // uiEolAfterMsg() in uiStartFileAddit() can cause unneeded carriage return + // in archiving percent after creating a new volume with -v -idn (and -rr + // for UIEVENT_PROTECT*) switches. AnyMessageDisplayed is set for messages + // after UIEVENT_NEWARCHIVE, so archiving percent with -idn is moved to + // next line and does not delete their last characters. + // Similarly we ignore UIEVENT_RRTESTINGEND for volumes, because it is issued + // before "Testing archive" and would add an excessive \n otherwise. + AnyMessageDisplayed=(Code!=UIEVENT_NEWARCHIVE && Code!=UIEVENT_RRTESTINGEND); + switch(Code) { case UIERROR_SYSERRMSG: @@ -121,6 +136,7 @@ void uiMsgStore::Msg() Log(NULL,St(MErrSeek),Str[0]); break; case UIERROR_FILEREAD: + mprintf(L"\n"); Log(Str[0],St(MErrRead),Str[1]); break; case UIERROR_FILEWRITE: @@ -304,7 +320,15 @@ void uiMsgStore::Msg() case UIERROR_ULINKEXIST: Log(NULL,St(MSymLinkExists),Str[0]); break; - + case UIERROR_READERRTRUNCATED: + Log(NULL,St(MErrReadTrunc),Str[0]); + break; + case UIERROR_READERRCOUNT: + Log(NULL,St(MErrReadCount),Num[0]); + break; + case UIERROR_DIRNAMEEXISTS: + Log(NULL,St(MDirNameExists)); + break; #ifndef SFX_MODULE case UIMSG_STRING: @@ -397,11 +421,15 @@ bool uiAskNextVolume(wchar *VolName,size_t MaxSize) } -bool uiAskRepeatRead(const wchar *FileName) +void uiAskRepeatRead(const wchar *FileName,bool &Ignore,bool &All,bool &Retry,bool &Quit) { - mprintf(L"\n"); - Log(NULL,St(MErrRead),FileName); - return Ask(St(MRetryAbort))==1; + eprintf(St(MErrReadInfo)); + int Code=Ask(St(MIgnoreAllRetryQuit)); + + Ignore=(Code==1); + All=(Code==2); + Quit=(Code==4); + Retry=!Ignore && !All && !Quit; // Default also for invalid input, not just for 'Retry'. } @@ -423,3 +451,15 @@ const wchar *uiGetMonthName(int Month) return St(MonthID[Month]); } #endif + + +void uiEolAfterMsg() +{ + if (AnyMessageDisplayed) + { + // Avoid deleting several last characters of any previous error message + // with percentage indicator in -idn mode. + AnyMessageDisplayed=false; + mprintf(L"\n"); + } +} diff --git a/deps/unrar/uisilent.cpp b/deps/unrar/uisilent.cpp index 1b5de13df..1df097566 100644 --- a/deps/unrar/uisilent.cpp +++ b/deps/unrar/uisilent.cpp @@ -67,3 +67,8 @@ const wchar *uiGetMonthName(int Month) return L""; } #endif + + +void uiEolAfterMsg() +{ +} diff --git a/deps/unrar/ulinks.cpp b/deps/unrar/ulinks.cpp index 1656824f6..d198f2e06 100644 --- a/deps/unrar/ulinks.cpp +++ b/deps/unrar/ulinks.cpp @@ -1,9 +1,14 @@ -static bool UnixSymlink(const char *Target,const wchar *LinkName,RarTime *ftm,RarTime *fta) +static bool UnixSymlink(CommandData *Cmd,const char *Target,const wchar *LinkName,RarTime *ftm,RarTime *fta) { - CreatePath(LinkName,true); + CreatePath(LinkName,true,Cmd->DisableNames); + + // Overwrite prompt was already issued and confirmed earlier, so we can + // remove existing symlink or regular file here. PrepareToDelete was also + // called earlier inside of uiAskReplaceEx. DelFile(LinkName); + char LinkNameA[NM]; WideToChar(LinkName,LinkNameA,ASIZE(LinkNameA)); if (symlink(Target,LinkNameA)==-1) // Error. @@ -75,7 +80,7 @@ bool ExtractUnixLink30(CommandData *Cmd,ComprDataIO &DataIO,Archive &Arc,const w if (!Cmd->AbsoluteLinks && (*TargetW==0 || IsFullPath(TargetW) || !IsRelativeSymlinkSafe(Cmd,Arc.FileHead.FileName,LinkName,TargetW))) return false; - return UnixSymlink(Target,LinkName,&Arc.FileHead.mtime,&Arc.FileHead.atime); + return UnixSymlink(Cmd,Target,LinkName,&Arc.FileHead.mtime,&Arc.FileHead.atime); } return false; } @@ -101,5 +106,5 @@ bool ExtractUnixLink50(CommandData *Cmd,const wchar *Name,FileHeader *hd) if (!Cmd->AbsoluteLinks && (IsFullPath(Target) || !IsRelativeSymlinkSafe(Cmd,hd->FileName,Name,hd->RedirName))) return false; - return UnixSymlink(Target,Name,&hd->mtime,&hd->atime); + return UnixSymlink(Cmd,Target,Name,&hd->mtime,&hd->atime); } diff --git a/deps/unrar/unicode.cpp b/deps/unrar/unicode.cpp index df63608d7..641f6c892 100644 --- a/deps/unrar/unicode.cpp +++ b/deps/unrar/unicode.cpp @@ -471,6 +471,7 @@ int wcsnicomp(const wchar *s1,const wchar *s2,size_t n) } +// Case insensitive wcsstr(). const wchar_t* wcscasestr(const wchar_t *str, const wchar_t *search) { for (size_t i=0;str[i]!=0;i++) diff --git a/deps/unrar/version.hpp b/deps/unrar/version.hpp index 24f0f8b5e..922c67ed8 100644 --- a/deps/unrar/version.hpp +++ b/deps/unrar/version.hpp @@ -1,6 +1,6 @@ -#define RARVER_MAJOR 5 -#define RARVER_MINOR 91 +#define RARVER_MAJOR 6 +#define RARVER_MINOR 1 #define RARVER_BETA 0 -#define RARVER_DAY 25 -#define RARVER_MONTH 6 -#define RARVER_YEAR 2020 +#define RARVER_DAY 7 +#define RARVER_MONTH 4 +#define RARVER_YEAR 2021 diff --git a/deps/unrar/volume.cpp b/deps/unrar/volume.cpp index 6cfa772e6..001a9673a 100644 --- a/deps/unrar/volume.cpp +++ b/deps/unrar/volume.cpp @@ -136,7 +136,7 @@ bool MergeArchive(Archive &Arc,ComprDataIO *DataIO,bool ShowFileName,wchar Comma Arc.ConvertAttributes(); Arc.Seek(Arc.NextBlockPos-Arc.FileHead.PackSize,SEEK_SET); } - if (ShowFileName) + if (ShowFileName && !Cmd->DisableNames) { mprintf(St(MExtrPoints),Arc.FileHead.FileName); if (!Cmd->DisablePercentage) diff --git a/deps/unrar/win32lnk.cpp b/deps/unrar/win32lnk.cpp index a68ed75a2..84ab63ff6 100644 --- a/deps/unrar/win32lnk.cpp +++ b/deps/unrar/win32lnk.cpp @@ -71,20 +71,36 @@ bool CreateReparsePoint(CommandData *Cmd,const wchar *Name,FileHeader *hd) !IsRelativeSymlinkSafe(Cmd,hd->FileName,Name,hd->RedirName))) return false; - CreatePath(Name,true); + CreatePath(Name,true,Cmd->DisableNames); + + // Overwrite prompt was already issued and confirmed earlier, so we can + // remove existing symlink or regular file here. PrepareToDelete was also + // called earlier inside of uiAskReplaceEx. + if (FileExist(Name)) + if (IsDir(GetFileAttr(Name))) + DelDir(Name); + else + DelFile(Name); // 'DirTarget' check is important for Unix symlinks to directories. // Unix symlinks do not have their own 'directory' attribute. if (hd->Dir || hd->DirTarget) { if (!CreateDirectory(Name,NULL)) + { + uiMsg(UIERROR_DIRCREATE,UINULL,Name); + ErrHandler.SetErrorCode(RARX_CREATE); return false; + } } else { HANDLE hFile=CreateFile(Name,GENERIC_WRITE,0,NULL,CREATE_NEW,FILE_ATTRIBUTE_NORMAL,NULL); if (hFile == INVALID_HANDLE_VALUE) + { + ErrHandler.CreateErrorMsg(Name); return false; + } CloseHandle(hFile); } @@ -138,7 +154,11 @@ bool CreateReparsePoint(CommandData *Cmd,const wchar *Name,FileHeader *hd) OPEN_EXISTING,FILE_FLAG_OPEN_REPARSE_POINT| FILE_FLAG_BACKUP_SEMANTICS,NULL); if (hFile==INVALID_HANDLE_VALUE) + { + ErrHandler.CreateErrorMsg(Name); + ErrHandler.SetErrorCode(RARX_CREATE); return false; + } DWORD Returned; if (!DeviceIoControl(hFile,FSCTL_SET_REPARSE_POINT,rdb, diff --git a/deps/xxHash/.gitignore b/deps/xxHash/.gitignore index 36639c6e8..d0ce9aac3 100644 --- a/deps/xxHash/.gitignore +++ b/deps/xxHash/.gitignore @@ -1,17 +1,47 @@ # objects *.o +*.obj +*.s # libraries libxxhash.* +!libxxhash.pc.in # Executables +*.exe xxh32sum xxh64sum +xxh128sum xxhsum xxhsum32 xxhsum_privateXXH xxhsum_inlinedXXH +dispatch +tests/generate_unicode_test + +# compilation chain +.clang_complete # Mac OS-X artefacts *.dSYM .DS_Store + +# Wasm / emcc / emscripten artefacts +*.html +*.wasm +*.js + +# CMake build directories +build*/ + +# project managers artifacts +.projectile + +# analyzer artifacts +infer-out + +# test artifacts +.test* +tmp* +tests/*.unicode +tests/unicode_test* diff --git a/deps/xxHash/.travis.yml b/deps/xxHash/.travis.yml index 895da855a..2f3a21680 100644 --- a/deps/xxHash/.travis.yml +++ b/deps/xxHash/.travis.yml @@ -1,9 +1,132 @@ language: c -compiler: gcc -script: make -B test-all + +# Dump CPU info before start before_install: - - sudo apt-get update -qq - - sudo apt-get install -qq gcc-arm-linux-gnueabi - - sudo apt-get install -qq clang - - sudo apt-get install -qq g++-multilib - - sudo apt-get install -qq gcc-multilib + - cat /proc/cpuinfo + +matrix: + fast_finish: true + include: + + - name: General linux tests (Xenial) + dist: xenial + arch: amd64 + addons: + apt: + packages: + - clang + - g++-multilib + - gcc-multilib + - cppcheck + script: + - make -B test-all + - make clean + - make dispatch + - make clean + - CC=g++ CFLAGS="-O1 -mavx512f" make + - make clean + - CC=g++ CFLAGS="-Wall -Wextra" make DISPATCH=1 + + + - name: Check results consistency on x64 + arch: amd64 + script: + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR make check # Scalar code path + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_SSE2 make check # SSE2 code path + - make clean + - CPPFLAGS="-mavx2 -DXXH_VECTOR=XXH_AVX2" make check # AVX2 code path + - make clean + - CPPFLAGS="-mavx512f -DXXH_VECTOR=XXH_AVX512" make check # AVX512 code path + - make clean + - CPPFLAGS=-DXXH_REROLL=1 make check # reroll code path (#240) + - make -C tests/bench + + - name: ARM compilation and consistency checks (Qemu) + dist: xenial + arch: amd64 + addons: + apt: + packages: + - qemu-system-arm + - qemu-user-static + - gcc-arm-linux-gnueabi + - libc6-dev-armel-cross + script: + # arm (32-bit) + - CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR LDFLAGS=-static RUN_ENV=qemu-arm-static make check # Scalar code path + - make clean + # NEON (32-bit) + - CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=XXH_NEON CFLAGS="-O3 -march=armv7-a -fPIC -mfloat-abi=softfp -mfpu=neon-vfpv4" LDFLAGS=-static RUN_ENV=qemu-arm-static make check # NEON code path + + - name: aarch64 compilation and consistency checks + dist: xenial + arch: arm64 + script: + # aarch64 + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR make check # Scalar code path + # NEON (64-bit) + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_NEON make check # NEON code path + # clang + - make clean + - CC=clang CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR make check # Scalar code path + # clang + NEON + - make clean + - CC=clang CPPFLAGS=-DXXH_VECTOR=XXH_NEON make check # NEON code path + + # We need Bionic here because the QEMU versions shipped in the older repos + # do not support POWER8 emulation, and compiling QEMU from source is a pain. + - name: PowerPC + PPC64 compilation and consistency checks (Qemu on Bionic) + dist: bionic + arch: amd64 + addons: + apt: + packages: + - qemu-system-ppc + - qemu-user-static + - gcc-powerpc-linux-gnu + - gcc-powerpc64-linux-gnu + - libc6-dev-powerpc-cross + - libc6-dev-ppc64-cross + script: + - CC=powerpc-linux-gnu-gcc RUN_ENV=qemu-ppc-static LDFLAGS=-static make check # Scalar code path + - make clean + - CC=powerpc64-linux-gnu-gcc RUN_ENV=qemu-ppc64-static CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR CFLAGS="-O3" LDFLAGS="-static -m64" make check # Scalar code path + # VSX code + - make clean + - CC=powerpc64-linux-gnu-gcc RUN_ENV="qemu-ppc64-static -cpu power8" CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-O3 -maltivec -mvsx -mcpu=power8 -mpower8-vector" LDFLAGS="-static -m64" make check # VSX code path + # altivec.h redefinition issue #426 + - make clean + - CC=powerpc64-linux-gnu-gcc CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-maltivec -mvsx -mcpu=power8 -mpower8-vector" make -C tests test_ppc_redefine + + - name: PPC64LE compilation and consistency checks + dist: xenial + arch: ppc64le + script: + # Scalar (universal) code path + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR LDFLAGS=-static make check + # VSX code path (64-bit) + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-O3 -maltivec -mvsx -mpower8-vector -mcpu=power8" LDFLAGS="-static" make check + # altivec.h redefinition issue #426 + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-maltivec -mvsx -mcpu=power8 -mpower8-vector" make -C tests test_ppc_redefine + + - name: IBM s390x compilation and consistency checks + dist: bionic + arch: s390x + script: + # Scalar (universal) code path + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR LDFLAGS=-static make check + # s390x code path (64-bit) + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-O3 -march=arch11 -mzvector" LDFLAGS="-static" make check + + - name: cmake build test + script: + - cd cmake_unofficial + - mkdir build + - cd build + - cmake .. + - make diff --git a/deps/xxHash/CHANGELOG b/deps/xxHash/CHANGELOG new file mode 100644 index 000000000..23870756b --- /dev/null +++ b/deps/xxHash/CHANGELOG @@ -0,0 +1,52 @@ +v0.8.0 +- api : stabilize XXH3 +- cli : xxhsum can parse BSD-style --check lines, by @WayneD +- cli : `xxhsum -` accepts console input, requested by @jaki +- cli : xxhsum accepts -- separator, by @jaki +- cli : fix : print correct default algo for symlinked helpers, by @martinetd +- install: improved pkgconfig script, allowing custom install locations, requested by @ellert + +v0.7.4 +- perf: automatic vector detection and selection at runtime (`xxh_x86dispatch.h`), initiated by @easyaspi314 +- perf: added AVX512 support, by @gzm55 +- api : new: secret generator `XXH_generateSecret()`, suggested by @koraa +- api : fix: XXH3_state_t is movable, identified by @koraa +- api : fix: state is correctly aligned in AVX mode (unlike `malloc()`), by @easyaspi314 +- api : fix: streaming generated wrong values in some combination of random ingestion lengths, reported by @WayneD +- cli : fix unicode print on Windows, by @easyaspi314 +- cli : can `-c` check file generated by sfv +- build: `make DISPATCH=1` generates `xxhsum` and `libxxhash` with runtime vector detection (x86/x64 only) +- install: cygwin installation support +- doc : Cryptol specification of XXH32 and XXH64, by @weaversa + +v0.7.3 +- perf: improved speed for large inputs (~+20%) +- perf: improved latency for small inputs (~10%) +- perf: s390x Vectorial code, by @easyaspi314 +- cli: improved support for Unicode filenames on Windows, thanks to @easyaspi314 and @t-mat +- api: `xxhash.h` can now be included in any order, with and without `XXH_STATIC_LINKING_ONLY` and `XXH_INLINE_ALL` +- build: xxHash's implementation transferred into `xxhash.h`. No more need to have `xxhash.c` in the `/include` directory for `XXH_INLINE_ALL` to work +- install: created pkg-config file, by @bket +- install: VCpkg installation instructions, by @LilyWangL +- doc: Highly improved code documentation, by @easyaspi314 +- misc: New test tool in `/tests/collisions`: brute force collision tester for 64-bit hashes + +v0.7.2 +- Fixed collision ratio of `XXH128` for some specific input lengths, reported by @svpv +- Improved `VSX` and `NEON` variants, by @easyaspi314 +- Improved performance of scalar code path (`XXH_VECTOR=0`), by @easyaspi314 +- `xxhsum`: can generate 128-bit hashes with the `-H2` option (note: for experimental purposes only! `XXH128` is not yet frozen) +- `xxhsum`: option `-q` removes status notifications + +v0.7.1 +- Secret first: the algorithm computation can be altered by providing a "secret", which is any blob of bytes, of size >= `XXH3_SECRET_SIZE_MIN`. +- `seed` is still available, and acts as a secret generator +- updated `ARM NEON` variant by @easyaspi314 +- Streaming implementation is available +- Improve compatibility and performance with Visual Studio, with help from @aras-p +- Better integration when using `XXH_INLINE_ALL`: do not pollute host namespace, use its own macros, such as `XXH_ASSERT()`, `XXH_ALIGN`, etc. +- 128-bit variant provides helper functions for comparison of hashes. +- Better `clang` generation of `rotl` instruction, thanks to @easyaspi314 +- `XXH_REROLL` build macro to reduce binary size, by @easyaspi314 +- Improved `cmake` script, by @Mezozoysky +- Full benchmark program provided in `/tests/bench` diff --git a/deps/xxHash/LICENSE b/deps/xxHash/LICENSE index 7de801ed1..fa20595dc 100644 --- a/deps/xxHash/LICENSE +++ b/deps/xxHash/LICENSE @@ -1,7 +1,9 @@ xxHash Library -Copyright (c) 2012-2014, Yann Collet +Copyright (c) 2012-2020 Yann Collet All rights reserved. +BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -22,3 +24,25 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------- + +xxhsum command line interface +Copyright (c) 2013-2020 Yann Collet +All rights reserved. + +GPL v2 License + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. diff --git a/deps/xxHash/Makefile b/deps/xxHash/Makefile index 6dd738f2f..ef24e94c6 100644 --- a/deps/xxHash/Makefile +++ b/deps/xxHash/Makefile @@ -1,6 +1,6 @@ # ################################################################ # xxHash Makefile -# Copyright (C) Yann Collet 2012-2015 +# Copyright (C) 2012-2020 Yann Collet # # GPL v2 License # @@ -18,38 +18,35 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# You can contact the author at : -# - xxHash source repository : http://code.google.com/p/xxhash/ +# You can contact the author at: +# - xxHash homepage: https://www.xxhash.com +# - xxHash source repository: https://github.com/Cyan4973/xxHash # ################################################################ -# xxhsum : provides 32/64 bits hash of one or multiple files, or stdin +# xxhsum: provides 32/64 bits hash of one or multiple files, or stdin # ################################################################ +Q = $(if $(filter 1,$(V) $(VERBOSE)),,@) # Version numbers -LIBVER_MAJOR_SCRIPT:=`sed -n '/define XXH_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` -LIBVER_MINOR_SCRIPT:=`sed -n '/define XXH_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` -LIBVER_PATCH_SCRIPT:=`sed -n '/define XXH_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +SED ?= sed +SED_ERE_OPT ?= -E +LIBVER_MAJOR_SCRIPT:=`$(SED) -n '/define XXH_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_MINOR_SCRIPT:=`$(SED) -n '/define XXH_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_PATCH_SCRIPT:=`$(SED) -n '/define XXH_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT)) LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH) -# SSE4 detection -HAVE_SSE4 := $(shell $(CC) -dM -E - < /dev/null | grep "SSE4" > /dev/null && echo 1 || echo 0) -ifeq ($(HAVE_SSE4), 1) -NOSSE4 := -mno-sse4 -else -NOSSE4 := -endif - -CFLAGS ?= -O2 $(NOSSE4) # disables potential auto-vectorization -CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ - -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef - -FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MOREFLAGS) -XXHSUM_VERSION=$(LIBVER) -MD2ROFF = ronn -MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="xxhsum $(XXHSUM_VERSION)" +CFLAGS ?= -O3 +DEBUGFLAGS+=-Wall -Wextra -Wconversion -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls -Wstrict-overflow=2 +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CFLAGS) $(CPPFLAGS) +XXHSUM_VERSION = $(LIBVER) +UNAME := $(shell uname) # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) @@ -59,8 +56,8 @@ EXT = endif # OS X linker doesn't support -soname, and use different extension -# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html -ifeq ($(shell uname), Darwin) +# see: https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html +ifeq ($(UNAME), Darwin) SHARED_EXT = dylib SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) @@ -75,191 +72,336 @@ endif LIBXXH = libxxhash.$(SHARED_EXT_VER) +## generate CLI and libraries in release mode (default for `make`) .PHONY: default +default: DEBUGFLAGS= default: lib xxhsum_and_links .PHONY: all all: lib xxhsum xxhsum_inlinedXXH -xxhsum32: CFLAGS += -m32 -xxhsum xxhsum32: xxhash.c xxhsum.c - $(CC) $(FLAGS) $^ -o $@$(EXT) +## xxhsum is the command line interface (CLI) +ifeq ($(DISPATCH),1) +xxhsum: CPPFLAGS += -DXXHSUM_DISPATCH=1 +xxhsum: xxh_x86dispatch.o +endif +xxhsum: xxhash.o xxhsum.o + $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +xxhsum32: CFLAGS += -m32 ## generate CLI in 32-bits mode +xxhsum32: xxhash.c xxhsum.c ## do not generate object (avoid mixing different ABI) + $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +## dispatch only works for x86/x64 systems +dispatch: CPPFLAGS += -DXXHSUM_DISPATCH=1 +dispatch: xxhash.o xxh_x86dispatch.o xxhsum.c + $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +xxhash.o: xxhash.c xxhash.h +xxhsum.o: xxhsum.c xxhash.h xxh_x86dispatch.h +xxh_x86dispatch.o: xxh_x86dispatch.c xxh_x86dispatch.h xxhash.h .PHONY: xxhsum_and_links -xxhsum_and_links: xxhsum - ln -sf xxhsum xxh32sum - ln -sf xxhsum xxh64sum +xxhsum_and_links: xxhsum xxh32sum xxh64sum xxh128sum +xxh32sum xxh64sum xxh128sum: xxhsum + ln -sf $<$(EXT) $@$(EXT) + +xxhsum_inlinedXXH: CPPFLAGS += -DXXH_INLINE_ALL xxhsum_inlinedXXH: xxhsum.c - $(CC) $(FLAGS) -DXXH_PRIVATE_API $^ -o $@$(EXT) + $(CC) $(FLAGS) $^ -o $@$(EXT) # library libxxhash.a: ARFLAGS = rcs libxxhash.a: xxhash.o - @echo compiling static library - @$(AR) $(ARFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(LIBXXH): LDFLAGS += -shared ifeq (,$(filter Windows%,$(OS))) -$(LIBXXH): LDFLAGS += -fPIC +$(LIBXXH): CFLAGS += -fPIC +endif +ifeq ($(DISPATCH),1) +$(LIBXXH): xxh_x86dispatch.c endif $(LIBXXH): xxhash.c - @echo compiling dynamic library $(LIBVER) - @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ - @echo creating versioned links - @ln -sf $@ libxxhash.$(SHARED_EXT_MAJOR) - @ln -sf $@ libxxhash.$(SHARED_EXT) + $(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ + ln -sf $@ libxxhash.$(SHARED_EXT_MAJOR) + ln -sf $@ libxxhash.$(SHARED_EXT) -libxxhash : $(LIBXXH) +.PHONY: libxxhash +libxxhash: ## generate dynamic xxhash library +libxxhash: $(LIBXXH) +.PHONY: lib +lib: ## generate static and dynamic xxhash libraries lib: libxxhash.a libxxhash +# helper targets +AWK = awk +GREP = grep +SORT = sort + +.PHONY: list +list: ## list all Makefile targets + $(Q)$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | $(AWK) -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | $(SORT) | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs + +.PHONY: help +help: ## list documented targets + $(Q)$(GREP) -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ + $(SORT) | \ + $(AWK) 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +.PHONY: clean +clean: ## remove all build artifacts + $(Q)$(RM) -r *.dSYM # Mac OS-X specific + $(Q)$(RM) core *.o *.$(SHARED_EXT) *.$(SHARED_EXT).* *.a libxxhash.pc + $(Q)$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) dispatch$(EXT) + $(Q)$(RM) xxh32sum$(EXT) xxh64sum$(EXT) xxh128sum$(EXT) + @echo cleaning completed + + +# ================================================= # tests +# ================================================= +# make check can be run with cross-compiled binaries on emulated environments (qemu user mode) +# by setting $(RUN_ENV) to the target emulation environment .PHONY: check -check: xxhsum +check: xxhsum ## basic tests for xxhsum CLI, set RUN_ENV for emulated environments # stdin - ./xxhsum < xxhash.c + $(RUN_ENV) ./xxhsum$(EXT) < xxhash.c # multiple files - ./xxhsum xxhash.* xxhsum.* + $(RUN_ENV) ./xxhsum$(EXT) xxhash.* xxhsum.* # internal bench - ./xxhsum -bi1 + $(RUN_ENV) ./xxhsum$(EXT) -bi0 + # long bench command + $(RUN_ENV) ./xxhsum$(EXT) --benchmark-all -i0 + # bench multiple variants + $(RUN_ENV) ./xxhsum$(EXT) -b1,2,3 -i0 # file bench - ./xxhsum -bi1 xxhash.c + $(RUN_ENV) ./xxhsum$(EXT) -bi0 xxhash.c + # 32-bit + $(RUN_ENV) ./xxhsum$(EXT) -H0 xxhash.c + # 128-bit + $(RUN_ENV) ./xxhsum$(EXT) -H2 xxhash.c + # request incorrect variant + $(RUN_ENV) ./xxhsum$(EXT) -H9 xxhash.c ; test $$? -eq 1 + @printf "\n ....... checks completed successfully ....... \n" + +.PHONY: test-unicode +test-unicode: + $(MAKE) -C tests test_unicode .PHONY: test-mem -test-mem: xxhsum - # memory tests - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -bi1 xxhash.c - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H0 xxhash.c - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H1 xxhash.c +VALGRIND = valgrind --leak-check=yes --error-exitcode=1 +test-mem: RUN_ENV = $(VALGRIND) +test-mem: xxhsum check .PHONY: test32 test32: clean xxhsum32 @echo ---- test 32-bit ---- ./xxhsum32 -bi1 xxhash.c +.PHONY: test-xxhsum-c test-xxhsum-c: xxhsum # xxhsum to/from pipe - ./xxhsum lib* | ./xxhsum -c - - ./xxhsum -H0 lib* | ./xxhsum -c - + ./xxhsum xxh* | ./xxhsum -c - + ./xxhsum -H0 xxh* | ./xxhsum -c - + # xxhsum -q does not display "Loading" message into stderr (#251) + ! ./xxhsum -q xxh* 2>&1 | grep Loading + # xxhsum does not display "Loading" message into stderr either + ! ./xxhsum xxh* 2>&1 | grep Loading + # Check that xxhsum do display filename that it failed to open. + LC_ALL=C ./xxhsum nonexistent 2>&1 | grep "Error: Could not open 'nonexistent'" # xxhsum to/from file, shell redirection - ./xxhsum lib* > .test.xxh64 - ./xxhsum -H0 lib* > .test.xxh32 - ./xxhsum -c .test.xxh64 - ./xxhsum -c .test.xxh32 + ./xxhsum xxh* > .test.xxh64 + ./xxhsum --tag xxh* > .test.xxh64_tag + ./xxhsum --little-endian xxh* > .test.le_xxh64 + ./xxhsum --tag --little-endian xxh* > .test.le_xxh64_tag + ./xxhsum -H0 xxh* > .test.xxh32 + ./xxhsum -H0 --tag xxh* > .test.xxh32_tag + ./xxhsum -H0 --little-endian xxh* > .test.le_xxh32 + ./xxhsum -H0 --tag --little-endian xxh* > .test.le_xxh32_tag + ./xxhsum -H2 xxh* > .test.xxh128 + ./xxhsum -H2 --tag xxh* > .test.xxh128_tag + ./xxhsum -H2 --little-endian xxh* > .test.le_xxh128 + ./xxhsum -H2 --tag --little-endian xxh* > .test.le_xxh128_tag + ./xxhsum -c .test.xxh* + ./xxhsum -c --little-endian .test.le_xxh* + ./xxhsum -c .test.*_tag + # read list of files from stdin ./xxhsum -c < .test.xxh64 ./xxhsum -c < .test.xxh32 + cat .test.xxh* | ./xxhsum -c - + # check variant with '*' marker as second separator + $(SED) 's/ / \*/' .test.xxh32 | ./xxhsum -c + # bsd-style output + ./xxhsum --tag xxhsum* | $(GREP) XXH64 + ./xxhsum --tag -H0 xxhsum* | $(GREP) XXH32 + ./xxhsum --tag -H1 xxhsum* | $(GREP) XXH64 + ./xxhsum --tag -H2 xxhsum* | $(GREP) XXH128 + ./xxhsum --tag -H32 xxhsum* | $(GREP) XXH32 + ./xxhsum --tag -H64 xxhsum* | $(GREP) XXH64 + ./xxhsum --tag -H128 xxhsum* | $(GREP) XXH128 + ./xxhsum --tag -H0 --little-endian xxhsum* | $(GREP) XXH32_LE + ./xxhsum --tag -H1 --little-endian xxhsum* | $(GREP) XXH64_LE + ./xxhsum --tag -H2 --little-endian xxhsum* | $(GREP) XXH128_LE + ./xxhsum --tag -H32 --little-endian xxhsum* | $(GREP) XXH32_LE + ./xxhsum --tag -H64 --little-endian xxhsum* | $(GREP) XXH64_LE + ./xxhsum --tag -H128 --little-endian xxhsum* | $(GREP) XXH128_LE + # check bsd-style + ./xxhsum --tag xxhsum* | ./xxhsum -c + ./xxhsum --tag -H32 --little-endian xxhsum* | ./xxhsum -c # xxhsum -c warns improperly format lines. - cat .test.xxh64 .test.xxh32 | ./xxhsum -c - - cat .test.xxh32 .test.xxh64 | ./xxhsum -c - + echo '12345678 ' >>.test.xxh32 + ./xxhsum -c .test.xxh32 | $(GREP) improperly + echo '123456789 file' >>.test.xxh64 + ./xxhsum -c .test.xxh64 | $(GREP) improperly # Expects "FAILED" echo "0000000000000000 LICENSE" | ./xxhsum -c -; test $$? -eq 1 echo "00000000 LICENSE" | ./xxhsum -c -; test $$? -eq 1 # Expects "FAILED open or read" echo "0000000000000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 echo "00000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 - @$(RM) -f .test.xxh32 .test.xxh64 + @$(RM) .test.* +.PHONY: armtest armtest: clean @echo ---- test ARM compilation ---- - $(MAKE) xxhsum CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static" + CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static" $(MAKE) xxhsum +.PHONY: clangtest clangtest: clean @echo ---- test clang compilation ---- - $(MAKE) all CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion" + CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion" $(MAKE) all -gpptest: clean - @echo ---- test g++ compilation ---- - $(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" +.PHONY: cxxtest +cxxtest: clean + @echo ---- test C++ compilation ---- + CC="$(CXX) -Wno-deprecated" $(MAKE) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror -fPIC" -c90test: clean +.PHONY: c90test +ifeq ($(NO_C90_TEST),true) +c90test: + @echo no c90 compatibility test +else +c90test: CPPFLAGS += -DXXH_NO_LONG_LONG +c90test: CFLAGS += -std=c90 -Werror -pedantic +c90test: xxhash.c @echo ---- test strict C90 compilation [xxh32 only] ---- - $(CC) -std=c90 -Werror -pedantic -DXXH_NO_LONG_LONG -c xxhash.c $(RM) xxhash.o + $(CC) $(FLAGS) $^ $(LDFLAGS) -c + $(RM) xxhash.o +endif +.PHONY: usan usan: CC=clang -usan: clean +usan: CXX=clang++ +usan: ## check CLI runtime for undefined behavior, using clang's sanitizer @echo ---- check undefined behavior - sanitize ---- - $(MAKE) clean test CC=$(CC) MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all" - -staticAnalyze: clean - @echo ---- static analyzer - scan-build ---- - CFLAGS="-g -Werror" scan-build --status-bugs -v $(MAKE) all - -namespaceTest: + $(MAKE) clean + $(MAKE) test CC=$(CC) CXX=$(CXX) MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all" + +.PHONY: staticAnalyze +SCANBUILD ?= scan-build +staticAnalyze: clean ## check C source files using $(SCANBUILD) static analyzer + @echo ---- static analyzer - $(SCANBUILD) ---- + CFLAGS="-g -Werror" $(SCANBUILD) --status-bugs -v $(MAKE) all + +CPPCHECK ?= cppcheck +.PHONY: cppcheck +cppcheck: ## check C source files using $(CPPCHECK) static analyzer + @echo ---- static analyzer - $(CPPCHECK) ---- + $(CPPCHECK) . --force --enable=warning,portability,performance,style --error-exitcode=1 > /dev/null + +.PHONY: namespaceTest +namespaceTest: ## ensure XXH_NAMESPACE redefines all public symbols $(CC) -c xxhash.c $(CC) -DXXH_NAMESPACE=TEST_ -c xxhash.c -o xxhash2.o $(CC) xxhash.o xxhash2.o xxhsum.c -o xxhsum2 # will fail if one namespace missing (symbol collision) $(RM) *.o xxhsum2 # clean -xxhsum.1: xxhsum.1.md - cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ +MD2ROFF ?= ronn +MD2ROFF_FLAGS ?= --roff --warnings --manual="User Commands" --organization="xxhsum $(XXHSUM_VERSION)" +xxhsum.1: xxhsum.1.md xxhash.h + cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | $(SED) -n '/^\.\\\".*/!p' > $@ -man: xxhsum.1 +.PHONY: man +man: xxhsum.1 ## generate man page from markdown source +.PHONY: clean-man clean-man: $(RM) xxhsum.1 -preview-man: clean-man man +.PHONY: preview-man +preview-man: man man ./xxhsum.1 -test: all namespaceTest check test-xxhsum-c c90test +.PHONY: test +test: DEBUGFLAGS += -DXXH_DEBUGLEVEL=1 +test: all namespaceTest check test-xxhsum-c c90test test-tools + +.PHONY: test-inline +test-inline: + $(MAKE) -C tests test_multiInclude -test-all: test test32 armtest clangtest gpptest usan listL120 trailingWhitespace staticAnalyze +.PHONY: test-all +test-all: CFLAGS += -Werror +test-all: test test32 clangtest cxxtest usan test-inline listL120 trailingWhitespace test-unicode + +.PHONY: test-tools +test-tools: + CFLAGS=-Werror $(MAKE) -C tests/bench + CFLAGS=-Werror $(MAKE) -C tests/collisions .PHONY: listL120 -listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility) +listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note: $$, for Makefile compatibility) find . -type f -name '*.c' -o -name '*.h' | while read -r filename; do awk 'length > 120 {print FILENAME "(" FNR "): " $$0}' $$filename; done .PHONY: trailingWhitespace trailingWhitespace: - ! grep -E "`printf '[ \\t]$$'`" *.1 *.c *.h LICENSE Makefile cmake_unofficial/CMakeLists.txt + ! $(GREP) -E "`printf '[ \\t]$$'`" xxhsum.1 *.c *.h LICENSE Makefile cmake_unofficial/CMakeLists.txt -.PHONY: clean -clean: - @$(RM) -r *.dSYM # Mac OS-X specific - @$(RM) core *.o libxxhash.* - @$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) xxh32sum xxh64sum - @echo cleaning completed - -#----------------------------------------------------------------------------- +# ========================================================= # make install is validated only for the following targets -#----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) - -.PHONY: list -list: - @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs +# ========================================================= +ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS CYGWIN% , $(UNAME))) DESTDIR ?= -# directory variables : GNU conventions prefer lowercase +# directory variables: GNU conventions prefer lowercase # see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html # support both lower and uppercase (BSD), use uppercase in script prefix ?= /usr/local PREFIX ?= $(prefix) exec_prefix ?= $(PREFIX) -libdir ?= $(exec_prefix)/lib +EXEC_PREFIX ?= $(exec_prefix) +libdir ?= $(EXEC_PREFIX)/lib LIBDIR ?= $(libdir) includedir ?= $(PREFIX)/include INCLUDEDIR ?= $(includedir) -bindir ?= $(exec_prefix)/bin +bindir ?= $(EXEC_PREFIX)/bin BINDIR ?= $(bindir) datarootdir ?= $(PREFIX)/share mandir ?= $(datarootdir)/man man1dir ?= $(mandir)/man1 -ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(UNAME),FreeBSD NetBSD DragonFly)) +PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig +else +PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig +endif + +ifneq (,$(filter $(UNAME),OpenBSD FreeBSD NetBSD DragonFly SunOS)) MANDIR ?= $(PREFIX)/man/man1 else MANDIR ?= $(man1dir) endif -ifneq (,$(filter $(shell uname),SunOS)) +ifneq (,$(filter $(UNAME),SunOS)) INSTALL ?= ginstall else INSTALL ?= install @@ -269,40 +411,84 @@ INSTALL_PROGRAM ?= $(INSTALL) INSTALL_DATA ?= $(INSTALL) -m 644 +PCLIBDIR ?= $(shell echo "$(LIBDIR)" | $(SED) -n $(SED_ERE_OPT) -e "s@^$(EXEC_PREFIX)(/|$$)@@p") +PCINCDIR ?= $(shell echo "$(INCLUDEDIR)" | $(SED) -n $(SED_ERE_OPT) -e "s@^$(PREFIX)(/|$$)@@p") +PCEXECDIR?= $(if $(filter $(PREFIX),$(EXEC_PREFIX)),$$\{prefix\},$(EXEC_PREFIX)) + +ifeq (,$(PCLIBDIR)) +# Additional prefix check is required, since the empty string is technically a +# valid PCLIBDIR +ifeq (,$(shell echo "$(LIBDIR)" | $(SED) -n $(SED_ERE_OPT) -e "\\@^$(EXEC_PREFIX)(/|$$)@ p")) +$(error configured libdir ($(LIBDIR)) is outside of exec_prefix ($(EXEC_PREFIX)), can't generate pkg-config file) +endif +endif + +ifeq (,$(PCINCDIR)) +# Additional prefix check is required, since the empty string is technically a +# valid PCINCDIR +ifeq (,$(shell echo "$(INCLUDEDIR)" | $(SED) -n $(SED_ERE_OPT) -e "\\@^$(PREFIX)(/|$$)@ p")) +$(error configured includedir ($(INCLUDEDIR)) is outside of prefix ($(PREFIX)), can't generate pkg-config file) +endif +endif + +libxxhash.pc: libxxhash.pc.in + @echo creating pkgconfig + $(Q)$(SED) $(SED_ERE_OPT) -e 's|@PREFIX@|$(PREFIX)|' \ + -e 's|@EXECPREFIX@|$(PCEXECDIR)|' \ + -e 's|@LIBDIR@|$(PCLIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(PCINCDIR)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + $< > $@ + + .PHONY: install -install: lib xxhsum +install: lib libxxhash.pc xxhsum ## install libraries, CLI, links and man page @echo Installing libxxhash - @$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR) - @$(INSTALL_DATA) libxxhash.a $(DESTDIR)$(LIBDIR) - @$(INSTALL_PROGRAM) $(LIBXXH) $(DESTDIR)$(LIBDIR) - @ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) - @ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) - @$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR) # includes - @$(INSTALL_DATA) xxhash.h $(DESTDIR)$(INCLUDEDIR) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR) + $(Q)$(INSTALL_DATA) libxxhash.a $(DESTDIR)$(LIBDIR) + $(Q)$(INSTALL_PROGRAM) $(LIBXXH) $(DESTDIR)$(LIBDIR) + $(Q)ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) + $(Q)ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR) # includes + $(Q)$(INSTALL_DATA) xxhash.h $(DESTDIR)$(INCLUDEDIR) + $(Q)$(INSTALL_DATA) xxh3.h $(DESTDIR)$(INCLUDEDIR) # for compatibility, will be removed in v0.9.0 +ifeq ($(DISPATCH),1) + $(Q)$(INSTALL_DATA) xxh_x86dispatch.h $(DESTDIR)$(INCLUDEDIR) +endif + @echo Installing pkgconfig + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/ + $(Q)$(INSTALL_DATA) libxxhash.pc $(DESTDIR)$(PKGCONFIGDIR)/ @echo Installing xxhsum - @$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ - @$(INSTALL_PROGRAM) xxhsum $(DESTDIR)$(BINDIR)/xxhsum - @ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh32sum - @ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh64sum + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ + $(Q)$(INSTALL_PROGRAM) xxhsum $(DESTDIR)$(BINDIR)/xxhsum + $(Q)ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh32sum + $(Q)ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh64sum + $(Q)ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh128sum @echo Installing man pages - @$(INSTALL_DATA) xxhsum.1 $(DESTDIR)$(MANDIR)/xxhsum.1 - @ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh32sum.1 - @ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh64sum.1 + $(Q)$(INSTALL_DATA) xxhsum.1 $(DESTDIR)$(MANDIR)/xxhsum.1 + $(Q)ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh32sum.1 + $(Q)ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh64sum.1 + $(Q)ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh128sum.1 @echo xxhash installation completed .PHONY: uninstall -uninstall: - @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.a - @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) - @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) - @$(RM) $(DESTDIR)$(LIBDIR)/$(LIBXXH) - @$(RM) $(DESTDIR)$(INCLUDEDIR)/xxhash.h - @$(RM) $(DESTDIR)$(BINDIR)/xxh32sum - @$(RM) $(DESTDIR)$(BINDIR)/xxh64sum - @$(RM) $(DESTDIR)$(BINDIR)/xxhsum - @$(RM) $(DESTDIR)$(MANDIR)/xxh32sum.1 - @$(RM) $(DESTDIR)$(MANDIR)/xxh64sum.1 - @$(RM) $(DESTDIR)$(MANDIR)/xxhsum.1 +uninstall: ## uninstall libraries, CLI, links and man page + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.a + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/$(LIBXXH) + $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/xxhash.h + $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/xxh3.h + $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/xxh_x86dispatch.h + $(Q)$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libxxhash.pc + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxh32sum + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxh64sum + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxh128sum + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxhsum + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxh32sum.1 + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxh64sum.1 + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxh128sum.1 + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxhsum.1 @echo xxhsum successfully uninstalled endif diff --git a/deps/xxHash/README.md b/deps/xxHash/README.md index 30318a9f5..01637f499 100644 --- a/deps/xxHash/README.md +++ b/deps/xxHash/README.md @@ -1,8 +1,9 @@ xxHash - Extremely fast hash algorithm ====================================== + xxHash is an Extremely fast Hash algorithm, running at RAM speed limits. -It successfully completes the [SMHasher](http://code.google.com/p/smhasher/wiki/SMHasher) test suite +It successfully completes the [SMHasher](https://code.google.com/p/smhasher/wiki/SMHasher) test suite which evaluates collision, dispersion and randomness qualities of hash functions. Code is highly portable, and hashes are identical on all platforms (little / big endian). @@ -20,19 +21,21 @@ The benchmark uses SMHasher speed test, compiled with Visual 2010 on a Windows S The reference system uses a Core 2 Duo @3GHz -| Name | Speed | Quality | Author | -|---------------|----------|:-------:|------------------| -| [xxHash] | 5.4 GB/s | 10 | Y.C. | -| MurmurHash 3a | 2.7 GB/s | 10 | Austin Appleby | -| SBox | 1.4 GB/s | 9 | Bret Mulvey | -| Lookup3 | 1.2 GB/s | 9 | Bob Jenkins | -| CityHash64 | 1.05 GB/s| 10 | Pike & Alakuijala| -| FNV | 0.55 GB/s| 5 | Fowler, Noll, Vo | -| CRC32 | 0.43 GB/s| 9 | | -| MD5-32 | 0.33 GB/s| 10 | Ronald L.Rivest | -| SHA1-32 | 0.28 GB/s| 10 | | +| Name | Speed | Quality | Author | +|---------------|--------------------|:-------:|-------------------| +| [xxHash] | 5.4 GB/s | 10 | Y.C. | +| MurmurHash 3a | 2.7 GB/s | 10 | Austin Appleby | +| SBox | 1.4 GB/s | 9 | Bret Mulvey | +| Lookup3 | 1.2 GB/s | 9 | Bob Jenkins | +| CityHash64 | 1.05 GB/s | 10 | Pike & Alakuijala | +| FNV | 0.55 GB/s | 5 | Fowler, Noll, Vo | +| CRC32 | 0.43 GB/s † | 9 | | +| MD5-32 | 0.33 GB/s | 10 | Ronald L.Rivest | +| SHA1-32 | 0.28 GB/s | 10 | | -[xxHash]: http://www.xxhash.com +[xxHash]: https://www.xxhash.com + +Note †: SMHasher's CRC32 implementation is known to be slow. Faster implementations exist. Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. @@ -46,13 +49,13 @@ Note however that 32-bit applications will still run faster using the 32-bit ver SMHasher speed test, compiled using GCC 4.8.2, on Linux Mint 64-bit. The reference system uses a Core i5-3340M @2.7GHz -| Version | Speed on 64-bit | Speed on 32-bit | +| Version | Speed on 64-bit | Speed on 32-bit | |------------|------------------|------------------| | XXH64 | 13.8 GB/s | 1.9 GB/s | | XXH32 | 6.8 GB/s | 6.0 GB/s | -This project also includes a command line utility, named `xxhsum`, offering similar features as `md5sum`, -thanks to [Takayuki Matsuoka](https://github.com/t-mat) contributions. +This project also includes a command line utility, named `xxhsum`, offering similar features to `md5sum`, +thanks to [Takayuki Matsuoka](https://github.com/t-mat)'s contributions. ### License @@ -61,86 +64,142 @@ The library files `xxhash.c` and `xxhash.h` are BSD licensed. The utility `xxhsum` is GPL licensed. +### New hash algorithms + +Starting with `v0.7.0`, the library includes a new algorithm named `XXH3`, +which is able to generate 64 and 128-bit hashes. + +The new algorithm is much faster than its predecessors for both long and small inputs, +which can be observed in the following graphs: + +![XXH3, bargraph](https://user-images.githubusercontent.com/750081/61976096-b3a35f00-af9f-11e9-8229-e0afc506c6ec.png) + +![XXH3, latency, random size](https://user-images.githubusercontent.com/750081/61976089-aedeab00-af9f-11e9-9239-e5375d6c080f.png) + +To access these new prototypes, one needs to unlock their declaration, using the build macro `XXH_STATIC_LINKING_ONLY`. + +The algorithm is currently in development, meaning its return values might still change in future versions. +However, the API is stable, and can be used in production, +typically for generation of ephemeral hashes (produced and consumed in same session). + +`XXH3` has now reached "release candidate" status. +If everything remains fine, its format will be "frozen" and become final. +After which, return values of `XXH3` and `XXH128` will no longer change in future versions. +`XXH3`'s return values will be officially finalized upon reaching `v0.8.0`. + + ### Build modifiers -The following macros can be set at compilation time, -they modify xxhash behavior. They are all disabled by default. - -- `XXH_INLINE_ALL` : Make all functions `inline`, with bodies directly included within `xxhash.h`. - There is no need for an `xxhash.o` module in this case. - Inlining functions is generally beneficial for speed on small keys. - It's especially effective when key length is a compile time constant, - with observed performance improvement in the +200% range . - See [this article](https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html) for details. -- `XXH_ACCEPT_NULL_INPUT_POINTER` : if set to `1`, when input is a null-pointer, - xxhash result is the same as a zero-length key - (instead of a dereference segfault). -- `XXH_FORCE_MEMORY_ACCESS` : default method `0` uses a portable `memcpy()` notation. - Method `1` uses a gcc-specific `packed` attribute, which can provide better performance for some targets. - Method `2` forces unaligned reads, which is not standard compliant, but might sometimes be the only way to extract better performance. -- `XXH_CPU_LITTLE_ENDIAN` : by default, endianess is determined at compile time. - It's possible to skip auto-detection and force format to little-endian, by setting this macro to 1. - Setting it to 0 forces big-endian. -- `XXH_FORCE_NATIVE_FORMAT` : on big-endian systems : use native number representation. - Breaks consistency with little-endian results. -- `XXH_PRIVATE_API` : same impact as `XXH_INLINE_ALL`. - Name underlines that symbols will not be published on library public interface. -- `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`. - Useful to evade symbol naming collisions, - in case of multiple inclusions of xxHash source code. - Client applications can still use regular function name, - symbols are automatically translated through `xxhash.h`. -- `XXH_STATIC_LINKING_ONLY` : gives access to state declaration for static allocation. - Incompatible with dynamic linking, due to risks of ABI changes. -- `XXH_NO_LONG_LONG` : removes support for XXH64, - for targets without 64-bit support. +The following macros can be set at compilation time to modify libxxhash's behavior. They are generally disabled by default. + +- `XXH_INLINE_ALL`: Make all functions `inline`, with implementations being directly included within `xxhash.h`. + Inlining functions is beneficial for speed on small keys. + It's _extremely effective_ when key length is expressed as _a compile time constant_, + with performance improvements observed in the +200% range . + See [this article](https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html) for details. +- `XXH_PRIVATE_API`: same outcome as `XXH_INLINE_ALL`. Still available for legacy support. + The name underlines that `XXH_*` symbols will not be exported. +- `XXH_NAMESPACE`: Prefixes all symbols with the value of `XXH_NAMESPACE`. + This macro can only use compilable character set. + Useful to evade symbol naming collisions, + in case of multiple inclusions of xxHash's source code. + Client applications still use the regular function names, + as symbols are automatically translated through `xxhash.h`. +- `XXH_FORCE_MEMORY_ACCESS`: The default method `0` uses a portable `memcpy()` notation. + Method `1` uses a gcc-specific `packed` attribute, which can provide better performance for some targets. + Method `2` forces unaligned reads, which is not standards compliant, but might sometimes be the only way to extract better read performance. + Method `3` uses a byteshift operation, which is best for old compilers which don't inline `memcpy()` or big-endian systems without a byteswap instruction +- `XXH_FORCE_ALIGN_CHECK`: Use a faster direct read path when input is aligned. + This option can result in dramatic performance improvement when input to hash is aligned on 32 or 64-bit boundaries, + when running on architectures unable to load memory from unaligned addresses, or suffering a performance penalty from it. + It is (slightly) detrimental on platform with good unaligned memory access performance (same instruction for both aligned and unaligned accesses). + This option is automatically disabled on `x86`, `x64` and `aarch64`, and enabled on all other platforms. +- `XXH_VECTOR` : manually select a vector instruction set (default: auto-selected at compilation time). Available instruction sets are `XXH_SCALAR`, `XXH_SSE2`, `XXH_AVX2`, `XXH_AVX512`, `XXH_NEON` and `XXH_VSX`. Compiler may require additional flags to ensure proper support (for example, `gcc` on linux will require `-mavx2` for AVX2, and `-mavx512f` for AVX512). +- `XXH_NO_PREFETCH` : disable prefetching. XXH3 only. +- `XXH_PREFETCH_DIST` : select prefecting distance. XXH3 only. +- `XXH_NO_INLINE_HINTS`: By default, xxHash uses `__attribute__((always_inline))` and `__forceinline` to improve performance at the cost of code size. + Defining this macro to 1 will mark all internal functions as `static`, allowing the compiler to decide whether to inline a function or not. + This is very useful when optimizing for smallest binary size, + and is automatically defined when compiling with `-O0`, `-Os`, `-Oz`, or `-fno-inline` on GCC and Clang. + This may also increase performance depending on compiler and architecture. +- `XXH_REROLL`: Reduces the size of the generated code by not unrolling some loops. + Impact on performance may vary, depending on platform and algorithm. +- `XXH_ACCEPT_NULL_INPUT_POINTER`: if set to `1`, when input is a `NULL` pointer, + xxHash'd result is the same as a zero-length input + (instead of a dereference segfault). + Adds one branch at the beginning of each hash. +- `XXH_STATIC_LINKING_ONLY`: gives access to the state declaration for static allocation. + Incompatible with dynamic linking, due to risks of ABI changes. +- `XXH_NO_LONG_LONG`: removes compilation of algorithms relying on 64-bit types (XXH3 and XXH64). Only XXH32 will be compiled. + Useful for targets (architectures and compilers) without 64-bit support. +- `XXH_IMPORT`: MSVC specific: should only be defined for dynamic linking, as it prevents linkage errors. +- `XXH_CPU_LITTLE_ENDIAN`: By default, endianess is determined by a runtime test resolved at compile time. + If, for some reason, the compiler cannot simplify the runtime test, it can cost performance. + It's possible to skip auto-detection and simply state that the architecture is little-endian by setting this macro to 1. + Setting it to 0 states big-endian. + +For the Command Line Interface `xxhsum`, the following environment variables can also be set : +- `DISPATCH=1` : use `xxh_x86dispatch.c`, to automatically select between `scalar`, `sse2`, `avx2` or `avx512` instruction set at runtime, depending on local host. This option is only valid for `x86`/`x64` systems. + + +### Building xxHash - Using vcpkg + +You can download and install xxHash using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: + + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh + ./vcpkg integrate install + ./vcpkg install xxhash + +The xxHash port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. ### Example -Calling xxhash 64-bit variant from a C program : +Calling xxhash 64-bit variant from a C program: -``` +```C #include "xxhash.h" -unsigned long long calcul_hash(const void* buffer, size_t length) -{ - unsigned long long const seed = 0; /* or any other value */ - unsigned long long const hash = XXH64(buffer, length, seed); - return hash; + (...) + XXH64_hash_t hash = XXH64(buffer, size, seed); } ``` -Using streaming variant is more involved, but makes it possible to provide data in multiple rounds : -``` +Using streaming variant is more involved, but makes it possible to provide data incrementally: +```C #include "stdlib.h" /* abort() */ #include "xxhash.h" -unsigned long long calcul_hash_streaming(someCustomType handler) +XXH64_hash_t calcul_hash_streaming(FileHandler fh) { + /* create a hash state */ XXH64_state_t* const state = XXH64_createState(); if (state==NULL) abort(); - size_t const bufferSize = SOME_VALUE; + size_t const bufferSize = SOME_SIZE; void* const buffer = malloc(bufferSize); if (buffer==NULL) abort(); - unsigned long long const seed = 0; /* or any other value */ - XXH_errorcode const resetResult = XXH64_reset(state, seed); - if (resetResult == XXH_ERROR) abort(); + /* Initialize state with selected seed */ + XXH64_hash_t const seed = 0; /* or any other value */ + if (XXH64_reset(state, seed) == XXH_ERROR) abort(); + /* Feed the state with input data, any size, any number of times */ (...) while ( /* any condition */ ) { - size_t const length = get_more_data(buffer, bufferSize, handler); /* undescribed */ - XXH_errorcode const addResult = XXH64_update(state, buffer, length); - if (addResult == XXH_ERROR) abort(); + size_t const length = get_more_data(buffer, bufferSize, fh); + if (XXH64_update(state, buffer, length) == XXH_ERROR) abort(); (...) } - (...) - unsigned long long const hash = XXH64_digest(state); + /* Get the hash */ + XXH64_hash_t const hash = XXH64_digest(state); + + /* State can be re-used; in this example, it is simply freed */ free(buffer); XXH64_freeState(state); @@ -151,10 +210,10 @@ unsigned long long calcul_hash_streaming(someCustomType handler) ### Other programming languages -Beyond the C reference version, -xxHash is also available on many programming languages, -thanks to great contributors. -They are [listed here](http://www.xxhash.com/#other-languages). +Aside from the C reference version, +xxHash is also available in many different programming languages, +thanks to many great contributors. +They are [listed here](https://www.xxhash.com/#other-languages). ### Branch Policy diff --git a/deps/xxHash/appveyor.yml b/deps/xxHash/appveyor.yml index aa712227d..850f48b14 100644 --- a/deps/xxHash/appveyor.yml +++ b/deps/xxHash/appveyor.yml @@ -1,13 +1,44 @@ +#---------------------------------# +# general configuration # +#---------------------------------# version: 1.0.{build} +max_jobs: 2 + +#---------------------------------# +# environment configuration # +#---------------------------------# +clone_depth: 2 environment: matrix: + - COMPILER: "visual" + ARCH: "x64" + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "x64" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "Win32" + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "Win32" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013 + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "ARM" + - COMPILER: "visual" + ARCH: "ARM64" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + # note: ARM64 is not available with Visual Studio 14 2015, which is default for Appveyor - COMPILER: "gcc" PLATFORM: "mingw64" - COMPILER: "gcc" PLATFORM: "mingw32" + - COMPILER: "gcc" + PLATFORM: "clang" install: - - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION% + - ECHO Installing %COMPILER% %PLATFORM% %ARCH% - MKDIR bin - if [%COMPILER%]==[gcc] SET PATH_ORIGINAL=%PATH% - if [%COMPILER%]==[gcc] ( @@ -15,56 +46,66 @@ install: SET "PATH_MINGW64=c:\msys64\mingw64\bin;c:\msys64\usr\bin" && COPY C:\MinGW\bin\mingw32-make.exe C:\MinGW\bin\make.exe && COPY C:\MinGW\bin\gcc.exe C:\MinGW\bin\cc.exe - ) else ( - IF [%PLATFORM%]==[x64] (SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;") ) +#---------------------------------# +# build configuration # +#---------------------------------# build_script: - if [%PLATFORM%]==[mingw32] SET PATH=%PATH_MINGW32%;%PATH_ORIGINAL% - if [%PLATFORM%]==[mingw64] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% - - if [%PLATFORM%]==[clang] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% - - ECHO *** && - ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION% && - ECHO *** - - if [%PLATFORM%]==[clang] (clang -v) - - if [%COMPILER%]==[gcc] (gcc -v) + - if [%PLATFORM%]==[clang] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% + - ECHO *** + - ECHO Building %COMPILER% %PLATFORM% %ARCH% + - ECHO *** + + - if [%COMPILER%]==[gcc] ( + if [%PLATFORM%]==[clang] ( + clang -v + ) ELSE ( + gcc -v + ) + ) - if [%COMPILER%]==[gcc] ( echo ----- && make -v && echo ----- && if not [%PLATFORM%]==[clang] ( + if [%PLATFORM%]==[mingw32] ( SET CPPFLAGS=-DPOOL_MT=0 ) && make -B clean test MOREFLAGS=-Werror ) ELSE ( - make -B clean test CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" - ) + SET CXXFLAGS=--std=c++14 && + make -B clean test CC=clang CXX=clang++ MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wno-pass-failed" NO_C90_TEST=true + ) && + make -C tests/bench ) + # note 1: strict c90 tests with clang fail, due to (erroneous) presence on `inline` keyword in some included system file + # note 2: multi-threading code doesn't work with mingw32, disabled through POOL_MT=0 + # note 3: clang requires C++14 to compile sort because its own code contains c++14-only code + - if [%COMPILER%]==[visual] ( - ECHO *** && - ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - ECHO *** && - ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - ECHO *** && - ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - ECHO *** && - ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - COPY visual\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\ + cd cmake_unofficial && + cmake . -DCMAKE_BUILD_TYPE=Release -A %ARCH% -DXXHASH_C_FLAGS="/WX" && + cmake --build . --config Release ) +#---------------------------------# +# tests configuration # +#---------------------------------# test_script: - - ECHO *** && - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION% && - ECHO *** - - if not [%COMPILER%]==[unknown] ( - xxhsum -h && - xxhsum xxhsum.exe && - xxhsum -bi1 && - echo ------- xxhsum tested ------- + # note: can only run x86 and x64 binaries on Appveyor + # note: if %COMPILER%==gcc, xxhsum was already tested within `make test` + - if [%TEST_XXHSUM%]==[true] ( + ECHO *** && + ECHO Testing %COMPILER% %PLATFORM% %ARCH% && + ECHO *** && + cd Release && + xxhsum.exe -bi1 && + ECHO ------- xxhsum tested ------- ) + + +#---------------------------------# +# artifacts configuration # +#---------------------------------# +# none yet diff --git a/deps/xxHash/cmake_unofficial/.gitignore b/deps/xxHash/cmake_unofficial/.gitignore index bdb2593d4..93d9fe4f6 100644 --- a/deps/xxHash/cmake_unofficial/.gitignore +++ b/deps/xxHash/cmake_unofficial/.gitignore @@ -8,7 +8,5 @@ cmake_install.cmake # make compilation results -libxxhash.0.6.3.dylib -libxxhash.0.dylib -libxxhash.a -libxxhash.dylib +*.dylib +*.a diff --git a/deps/xxHash/cmake_unofficial/CMakeLists.txt b/deps/xxHash/cmake_unofficial/CMakeLists.txt index 1ca7a06d4..d9a8636ff 100644 --- a/deps/xxHash/cmake_unofficial/CMakeLists.txt +++ b/deps/xxHash/cmake_unofficial/CMakeLists.txt @@ -3,7 +3,9 @@ # the public domain worldwide. This software is distributed without # any warranty. # -# For details, see . +# For details, see . + +cmake_minimum_required (VERSION 2.8.12 FATAL_ERROR) set(XXHASH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..") @@ -18,33 +20,44 @@ set(XXHASH_LIB_VERSION ${XXHASH_VERSION_STRING}) set(XXHASH_LIB_SOVERSION "${XXHASH_VERSION_MAJOR}") mark_as_advanced(XXHASH_VERSION_MAJOR XXHASH_VERSION_MINOR XXHASH_VERSION_RELEASE XXHASH_VERSION_STRING XXHASH_LIB_VERSION XXHASH_LIB_SOVERSION) -option(BUILD_XXHSUM "Build the xxhsum binary" ON) -option(BUILD_SHARED_LIBS "Build shared library" ON) - +if("${CMAKE_VERSION}" VERSION_LESS "3.13") + #message(WARNING "CMake ${CMAKE_VERSION} has no CMP0077 policy: options will erase uncached/untyped normal vars!") +else() + cmake_policy (SET CMP0077 NEW) +endif() if("${CMAKE_VERSION}" VERSION_LESS "3.0") - project(XXHASH C) + project(xxHash C) else() - cmake_policy (SET CMP0048 NEW) - project(XXHASH - VERSION ${XXHASH_VERSION_STRING} - LANGUAGES C) + cmake_policy (SET CMP0048 NEW) + project(xxHash + VERSION ${XXHASH_VERSION_STRING} + LANGUAGES C) +endif() + +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Project build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE + PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo" "MinSizeRel") +endif() +if(NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "xxHash build type: ${CMAKE_BUILD_TYPE}") endif() -cmake_minimum_required (VERSION 2.8.12) +option(BUILD_SHARED_LIBS "Build shared library" ON) +set(XXHASH_BUILD_XXHSUM ON CACHE BOOL "Build the xxhsum binary") # If XXHASH is being bundled in another project, we don't want to # install anything. However, we want to let people override this, so # we'll use the XXHASH_BUNDLED_MODE variable to let them do that; just -# set it to OFF in your project before you add_subdirectory(xxhash/contrib/cmake_unofficial). -if(CMAKE_CURRENT_SOURCE_DIR STREQUAL "${CMAKE_SOURCE_DIR}") - # Bundled mode hasn't been set one way or the other, set the default - # depending on whether or not we are the top-level project. - if("${XXHASH_PARENT_DIRECTORY}" STREQUAL "") +# set it to OFF in your project before you add_subdirectory(xxhash/cmake_unofficial). +if(NOT DEFINED XXHASH_BUNDLED_MODE) + if("${PROJECT_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}") set(XXHASH_BUNDLED_MODE OFF) else() set(XXHASH_BUNDLED_MODE ON) endif() endif() +set(XXHASH_BUNDLED_MODE ${XXHASH_BUNDLED_MODE} CACHE BOOL "" FORCE) mark_as_advanced(XXHASH_BUNDLED_MODE) # Allow people to choose whether to build shared or static libraries @@ -53,20 +66,35 @@ mark_as_advanced(XXHASH_BUNDLED_MODE) include(CMakeDependentOption) CMAKE_DEPENDENT_OPTION(BUILD_SHARED_LIBS "Build shared libraries" ON "NOT XXHASH_BUNDLED_MODE" OFF) -include_directories("${XXHASH_DIR}") - # libxxhash add_library(xxhash "${XXHASH_DIR}/xxhash.c") +add_library(${PROJECT_NAME}::xxhash ALIAS xxhash) + +target_include_directories(xxhash + PUBLIC + $ + $) +if (BUILD_SHARED_LIBS) + target_compile_definitions(xxhash PUBLIC XXH_EXPORT) +endif () set_target_properties(xxhash PROPERTIES SOVERSION "${XXHASH_VERSION_STRING}" VERSION "${XXHASH_VERSION_STRING}") -# xxhsum -add_executable(xxhsum "${XXHASH_DIR}/xxhsum.c") -target_link_libraries(xxhsum xxhash) +if(XXHASH_BUILD_XXHSUM) + # xxhsum + add_executable(xxhsum "${XXHASH_DIR}/xxhsum.c") + add_executable(${PROJECT_NAME}::xxhsum ALIAS xxhsum) + + target_link_libraries(xxhsum PRIVATE xxhash) + target_include_directories(xxhsum PRIVATE "${XXHASH_DIR}") +endif(XXHASH_BUILD_XXHSUM) # Extra warning flags include (CheckCCompilerFlag) +if (XXHASH_C_FLAGS) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${XXHASH_C_FLAGS}") +endif() foreach (flag -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement @@ -88,13 +116,53 @@ endforeach (flag) if(NOT XXHASH_BUNDLED_MODE) include(GNUInstallDirs) - install(TARGETS xxhsum - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") install(TARGETS xxhash + EXPORT xxHashTargets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") install(FILES "${XXHASH_DIR}/xxhash.h" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") - install(FILES "${XXHASH_DIR}/xxhsum.1" - DESTINATION "${CMAKE_INSTALL_MANDIR}/man1") + install(FILES "${XXHASH_DIR}/xxh3.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + if(XXHASH_BUILD_XXHSUM) + install(TARGETS xxhsum + EXPORT xxHashTargets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") + install(FILES "${XXHASH_DIR}/xxhsum.1" + DESTINATION "${CMAKE_INSTALL_MANDIR}/man1") + endif(XXHASH_BUILD_XXHSUM) + + include(CMakePackageConfigHelpers) + + set(xxHash_VERSION_CONFIG "${PROJECT_BINARY_DIR}/xxHashConfigVersion.cmake") + set(xxHash_PROJECT_CONFIG "${PROJECT_BINARY_DIR}/xxHashConfig.cmake") + set(xxHash_TARGETS_CONFIG "${PROJECT_BINARY_DIR}/xxHashTargets.cmake") + set(xxHash_CONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/xxHash/") + write_basic_package_version_file(${xxHash_VERSION_CONFIG} + VERSION ${XXHASH_VERSION_STRING} + COMPATIBILITY AnyNewerVersion) + configure_package_config_file( + ${PROJECT_SOURCE_DIR}/xxHashConfig.cmake.in + ${xxHash_PROJECT_CONFIG} + INSTALL_DESTINATION ${xxHash_CONFIG_INSTALL_DIR}) + if("${CMAKE_VERSION}" VERSION_LESS "3.0") + set(XXHASH_EXPORT_SET xxhash) + if(XXHASH_BUILD_XXHSUM) + set(XXHASH_EXPORT_SET ${XXHASH_EXPORT_SET} xxhsum) + endif() + export(TARGETS ${XXHASH_EXPORT_SET} + FILE ${xxHash_TARGETS_CONFIG} + NAMESPACE ${PROJECT_NAME}::) + else() + export(EXPORT xxHashTargets + FILE ${xxHash_TARGETS_CONFIG} + NAMESPACE ${PROJECT_NAME}::) + endif() + + install(FILES ${xxHash_PROJECT_CONFIG} ${xxHash_VERSION_CONFIG} + DESTINATION ${xxHash_CONFIG_INSTALL_DIR}) + install(EXPORT xxHashTargets + DESTINATION ${xxHash_CONFIG_INSTALL_DIR} + NAMESPACE ${PROJECT_NAME}::) endif(NOT XXHASH_BUNDLED_MODE) diff --git a/deps/xxHash/cmake_unofficial/README.md b/deps/xxHash/cmake_unofficial/README.md index 4fca58dda..554c55a2e 100644 --- a/deps/xxHash/cmake_unofficial/README.md +++ b/deps/xxHash/cmake_unofficial/README.md @@ -1,6 +1,36 @@ +## Usage -The `cmake` script present in this directory offers the following options : +### Way 1: import targets +Build xxHash targets: + + cd + mkdir build + cd build + cmake ../cmake_unofficial [options] + cmake --build . + cmake --build . --target install #optional + +Where possible options are: +- `-DXXHASH_BUILD_ENABLE_INLINE_API=`: adds xxhash.c for the `-DXXH_INLINE_ALL` api. ON by default. +- `-DXXHASH_BUILD_XXHSUM=`: build the command line binary. ON by default +- `-DBUILD_SHARED_LIBS=`: build dynamic library. ON by default. +- `-DCMAKE_INSTALL_PREFIX=`: use custom install prefix path. + +Add lines into downstream CMakeLists.txt: + + find_package(xxHash 0.7 CONFIG REQUIRED) + ... + target_link_libraries(MyTarget PRIVATE xxHash::xxhash) + +### Way 2: Add subdirectory +Add lines into downstream CMakeLists.txt: + + option(BUILD_SHARE_LIBS "Build shared libs" OFF) #optional + ... + set(XXHASH_BUILD_ENABLE_INLINE_API OFF) #optional + set(XXHASH_BUILD_XXHSUM OFF) #optional + add_subdirectory( EXCLUDE_FROM_ALL) + ... + target_link_libraries(MyTarget PRIVATE xxHash::xxhash) -- `BUILD_XXHSUM` : build the command line binary. ON by default -- `BUILD_SHARED_LIBS` : build dynamic library. ON by default. diff --git a/deps/xxHash/cmake_unofficial/xxHashConfig.cmake.in b/deps/xxHash/cmake_unofficial/xxHashConfig.cmake.in new file mode 100644 index 000000000..fd282bee6 --- /dev/null +++ b/deps/xxHash/cmake_unofficial/xxHashConfig.cmake.in @@ -0,0 +1,4 @@ +@PACKAGE_INIT@ + +include(${CMAKE_CURRENT_LIST_DIR}/xxHashTargets.cmake) + diff --git a/deps/xxHash/doc/README.md b/deps/xxHash/doc/README.md new file mode 100644 index 000000000..a73ad7290 --- /dev/null +++ b/deps/xxHash/doc/README.md @@ -0,0 +1,9 @@ +xxHash Specification +======================= + +This directory contains material defining the xxHash algorithm. +It's described in [this specification document](xxhash_spec.md). + +The algorithm is also be illustrated by a [simple educational library](https://github.com/easyaspi314/xxhash-clean), +written by @easyaspi314 and designed for readability +(as opposed to the reference library which is designed for speed). diff --git a/deps/xxHash/doc/xxhash.cry b/deps/xxHash/doc/xxhash.cry new file mode 100644 index 000000000..984e1c8b6 --- /dev/null +++ b/deps/xxHash/doc/xxhash.cry @@ -0,0 +1,206 @@ +module xxhash where + +/** + * The 32-bit variant of xxHash. The first argument is the sequence + * of L bytes to hash. The second argument is a seed value. + */ +XXH32 : {L} (fin L) => [L][8] -> [32] -> [32] +XXH32 input seed = XXH32_avalanche acc1 + where (stripes16 # stripes4 # stripes1) = input + accR = foldl XXH32_rounds (XXH32_init seed) (split stripes16 : [L/16][16][8]) + accL = `(L % 2^^32) + if (`L:Integer) < 16 + then seed + PRIME32_5 + else XXH32_converge accR + acc4 = foldl XXH32_digest4 accL (split stripes4 : [(L%16)/4][4][8]) + acc1 = foldl XXH32_digest1 acc4 (stripes1 : [L%4][8]) + +/** + * The 64-bit variant of xxHash. The first argument is the sequence + * of L bytes to hash. The second argument is a seed value. + */ +XXH64 : {L} (fin L) => [L][8] -> [64] -> [64] +XXH64 input seed = XXH64_avalanche acc1 + where (stripes32 # stripes8 # stripes4 # stripes1) = input + accR = foldl XXH64_rounds (XXH64_init seed) (split stripes32 : [L/32][32][8]) + accL = `(L % 2^^64) + if (`L:Integer) < 32 + then seed + PRIME64_5 + else XXH64_converge accR + acc8 = foldl XXH64_digest8 accL (split stripes8 : [(L%32)/8][8][8]) + acc4 = foldl XXH64_digest4 acc8 (split stripes4 : [(L%8)/4][4][8]) + acc1 = foldl XXH64_digest1 acc4 (stripes1 : [L%4][8]) + +private + + //Utility functions + + /** + * Combines a sequence of bytes into a word using the little-endian + * convention. + */ + toLE bytes = join (reverse bytes) + + //32-bit xxHash helper functions + + //32-bit prime number constants + PRIME32_1 = 0x9E3779B1 : [32] + PRIME32_2 = 0x85EBCA77 : [32] + PRIME32_3 = 0xC2B2AE3D : [32] + PRIME32_4 = 0x27D4EB2F : [32] + PRIME32_5 = 0x165667B1 : [32] + + /** + * The property shows that the hexadecimal representation of the + * PRIME32 constants is the same as the binary representation. + */ + property PRIME32s_as_bits_correct = + (PRIME32_1 == 0b10011110001101110111100110110001) /\ + (PRIME32_2 == 0b10000101111010111100101001110111) /\ + (PRIME32_3 == 0b11000010101100101010111000111101) /\ + (PRIME32_4 == 0b00100111110101001110101100101111) /\ + (PRIME32_5 == 0b00010110010101100110011110110001) + + /** + * This function initializes the four internal accumulators of XXH32. + */ + XXH32_init : [32] -> [4][32] + XXH32_init seed = [acc1, acc2, acc3, acc4] + where acc1 = seed + PRIME32_1 + PRIME32_2 + acc2 = seed + PRIME32_2 + acc3 = seed + 0 + acc4 = seed - PRIME32_1 + + /** + * This processes a single lane of the main round function of XXH32. + */ + XXH32_round : [32] -> [32] -> [32] + XXH32_round accN laneN = ((accN + laneN * PRIME32_2) <<< 13) * PRIME32_1 + + /** + * This is the main round function of XXH32 and processes a stripe, + * i.e. 4 lanes with 4 bytes each. + */ + XXH32_rounds : [4][32] -> [16][8] -> [4][32] + XXH32_rounds accs stripe = + [ XXH32_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ] + + /** + * This function combines the four lane accumulators into a single + * 32-bit value. + */ + XXH32_converge : [4][32] -> [32] + XXH32_converge [acc1, acc2, acc3, acc4] = + (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18) + + /** + * This function digests a four byte lane + */ + XXH32_digest4 : [32] -> [4][8] -> [32] + XXH32_digest4 acc lane = ((acc + toLE lane * PRIME32_3) <<< 17) * PRIME32_4 + + /** + * This function digests a single byte lane + */ + XXH32_digest1 : [32] -> [8] -> [32] + XXH32_digest1 acc lane = ((acc + (0 # lane) * PRIME32_5) <<< 11) * PRIME32_1 + + /** + * This function ensures that all input bits have a chance to impact + * any bit in the output digest, resulting in an unbiased + * distribution. + */ + XXH32_avalanche : [32] -> [32] + XXH32_avalanche acc0 = acc5 + where acc1 = acc0 ^ (acc0 >> 15) + acc2 = acc1 * PRIME32_2 + acc3 = acc2 ^ (acc2 >> 13) + acc4 = acc3 * PRIME32_3 + acc5 = acc4 ^ (acc4 >> 16) + + //64-bit xxHash helper functions + + //64-bit prime number constants + PRIME64_1 = 0x9E3779B185EBCA87 : [64] + PRIME64_2 = 0xC2B2AE3D27D4EB4F : [64] + PRIME64_3 = 0x165667B19E3779F9 : [64] + PRIME64_4 = 0x85EBCA77C2B2AE63 : [64] + PRIME64_5 = 0x27D4EB2F165667C5 : [64] + + /** + * The property shows that the hexadecimal representation of the + * PRIME64 constants is the same as the binary representation. + */ + property PRIME64s_as_bits_correct = + (PRIME64_1 == 0b1001111000110111011110011011000110000101111010111100101010000111) /\ + (PRIME64_2 == 0b1100001010110010101011100011110100100111110101001110101101001111) /\ + (PRIME64_3 == 0b0001011001010110011001111011000110011110001101110111100111111001) /\ + (PRIME64_4 == 0b1000010111101011110010100111011111000010101100101010111001100011) /\ + (PRIME64_5 == 0b0010011111010100111010110010111100010110010101100110011111000101) + + /** + * This function initializes the four internal accumulators of XXH64. + */ + XXH64_init : [64] -> [4][64] + XXH64_init seed = [acc1, acc2, acc3, acc4] + where acc1 = seed + PRIME64_1 + PRIME64_2 + acc2 = seed + PRIME64_2 + acc3 = seed + 0 + acc4 = seed - PRIME64_1 + + /** + * This processes a single lane of the main round function of XXH64. + */ + XXH64_round : [64] -> [64] -> [64] + XXH64_round accN laneN = ((accN + laneN * PRIME64_2) <<< 31) * PRIME64_1 + + /** + * This is the main round function of XXH64 and processes a stripe, + * i.e. 4 lanes with 8 bytes each. + */ + XXH64_rounds : [4][64] -> [32][8] -> [4][64] + XXH64_rounds accs stripe = + [ XXH64_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ] + + /** + * This is a helper function, used to merge the four lane accumulators. + */ + mergeAccumulator : [64] -> [64] -> [64] + mergeAccumulator acc accN = (acc ^ XXH64_round 0 accN) * PRIME64_1 + PRIME64_4 + + /** + * This function combines the four lane accumulators into a single + * 64-bit value. + */ + XXH64_converge : [4][64] -> [64] + XXH64_converge [acc1, acc2, acc3, acc4] = + foldl mergeAccumulator ((acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)) [acc1, acc2, acc3, acc4] + + /** + * This function digests an eight byte lane + */ + XXH64_digest8 : [64] -> [8][8] -> [64] + XXH64_digest8 acc lane = ((acc ^ XXH64_round 0 (toLE lane)) <<< 27) * PRIME64_1 + PRIME64_4 + + /** + * This function digests a four byte lane + */ + XXH64_digest4 : [64] -> [4][8] -> [64] + XXH64_digest4 acc lane = ((acc ^ (0 # toLE lane) * PRIME64_1) <<< 23) * PRIME64_2 + PRIME64_3 + + /** + * This function digests a single byte lane + */ + XXH64_digest1 : [64] -> [8] -> [64] + XXH64_digest1 acc lane = ((acc ^ (0 # lane) * PRIME64_5) <<< 11) * PRIME64_1 + + /** + * This function ensures that all input bits have a chance to impact + * any bit in the output digest, resulting in an unbiased + * distribution. + */ + XXH64_avalanche : [64] -> [64] + XXH64_avalanche acc0 = acc5 + where acc1 = acc0 ^ (acc0 >> 33) + acc2 = acc1 * PRIME64_2 + acc3 = acc2 ^ (acc2 >> 29) + acc4 = acc3 * PRIME64_3 + acc5 = acc4 ^ (acc4 >> 32) diff --git a/deps/xxHash/doc/xxhash_spec.md b/deps/xxHash/doc/xxhash_spec.md index e673334b4..cd593d4ec 100644 --- a/deps/xxHash/doc/xxhash_spec.md +++ b/deps/xxHash/doc/xxhash_spec.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.1.0 (15/01/18) +0.1.1 (10/10/18) Table of Contents @@ -31,25 +31,25 @@ Table of Contents Introduction ---------------- -This document describes the xxHash digest algorithm, for both 32 and 64 variants, named `XXH32` and `XXH64`. The algorithm takes as input a message of arbitrary length and an optional seed value, it then produces an output of 32 or 64-bit as "fingerprint" or "digest". +This document describes the xxHash digest algorithm for both 32-bit and 64-bit variants, named `XXH32` and `XXH64`. The algorithm takes an input a message of arbitrary length and an optional seed value, then produces an output of 32 or 64-bit as "fingerprint" or "digest". -xxHash is primarily designed for speed. It is labelled non-cryptographic, and is not meant to avoid intentional collisions (same digest for 2 different messages), or to prevent producing a message with predefined digest. +xxHash is primarily designed for speed. It is labeled non-cryptographic, and is not meant to avoid intentional collisions (same digest for 2 different messages), or to prevent producing a message with a predefined digest. -XXH32 is designed to be fast on 32-bits machines. -XXH64 is designed to be fast on 64-bits machines. +XXH32 is designed to be fast on 32-bit machines. +XXH64 is designed to be fast on 64-bit machines. Both variants produce different output. -However, a given variant shall produce exactly the same output, irrespective of the cpu / os used. In particular, the result remains identical whatever the endianness and width of the cpu. +However, a given variant shall produce exactly the same output, irrespective of the cpu / os used. In particular, the result remains identical whatever the endianness and width of the cpu is. ### Operation notations All operations are performed modulo {32,64} bits. Arithmetic overflows are expected. `XXH32` uses 32-bit modular operations. `XXH64` uses 64-bit modular operations. -- `+` : denote modular addition -- `*` : denote modular multiplication -- `X <<< s` : denote the value obtained by circularly shifting (rotating) `X` left by `s` bit positions. -- `X >> s` : denote the value obtained by shifting `X` right by s bit positions. Upper `s` bits become `0`. -- `X xor Y` : denote the bit-wise XOR of `X` and `Y` (same width). +- `+`: denotes modular addition +- `*`: denotes modular multiplication +- `X <<< s`: denotes the value obtained by circularly shifting (rotating) `X` left by `s` bit positions. +- `X >> s`: denotes the value obtained by shifting `X` right by s bit positions. Upper `s` bits become `0`. +- `X xor Y`: denotes the bit-wise XOR of `X` and `Y` (same width). XXH32 Algorithm Description @@ -61,15 +61,17 @@ We begin by supposing that we have a message of any length `L` as input, and tha The algorithm collect and transform input in _stripes_ of 16 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 32-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units. -The algorithm uses 32-bits addition, multiplication, rotate, shift and xor operations. Many operations require some 32-bits prime number constants, all defined below : +The algorithm uses 32-bits addition, multiplication, rotate, shift and xor operations. Many operations require some 32-bits prime number constants, all defined below: - static const u32 PRIME32_1 = 2654435761U; - static const u32 PRIME32_2 = 2246822519U; - static const u32 PRIME32_3 = 3266489917U; - static const u32 PRIME32_4 = 668265263U; - static const u32 PRIME32_5 = 374761393U; + static const u32 PRIME32_1 = 0x9E3779B1U; // 0b10011110001101110111100110110001 + static const u32 PRIME32_2 = 0x85EBCA77U; // 0b10000101111010111100101001110111 + static const u32 PRIME32_3 = 0xC2B2AE3DU; // 0b11000010101100101010111000111101 + static const u32 PRIME32_4 = 0x27D4EB2FU; // 0b00100111110101001110101100101111 + static const u32 PRIME32_5 = 0x165667B1U; // 0b00010110010101100110011110110001 -### Step 1. Initialise internal accumulators +These constants are prime numbers, and feature a good mix of bits 1 and 0, neither too regular, nor too dissymmetric. These properties help dispersion capabilities. + +### Step 1. Initialize internal accumulators Each accumulator gets an initial value based on optional `seed` input. Since the `seed` is optional, it can be `0`. @@ -78,11 +80,11 @@ Each accumulator gets an initial value based on optional `seed` input. Since the u32 acc3 = seed + 0; u32 acc4 = seed - PRIME32_1; -#### Special case : input is less than 16 bytes +#### Special case: input is less than 16 bytes -When input is too small (< 16 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators. +When the input is too small (< 16 bytes), the algorithm will not process any stripes. Consequently, it will not make use of parallel accumulators. -In which case, a simplified initialization is performed, using a single accumulator : +In this case, a simplified initialization is performed, using a single accumulator: u32 acc = seed + PRIME32_5; @@ -96,7 +98,7 @@ The first lane is used to update accumulator 1, the second lane is used to updat Each lane read its associated 32-bit value using __little-endian__ convention. -For each {lane, accumulator}, the update process is called a _round_, and applies the following formula : +For each {lane, accumulator}, the update process is called a _round_, and applies the following formula: accN = accN + (laneN * PRIME32_2); accN = accN <<< 13; @@ -104,12 +106,12 @@ For each {lane, accumulator}, the update process is called a _round_, and applie This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^32. -Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 16 bytes). +Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except for the last remaining bytes which cannot form a stripe (< 16 bytes). When that happens, move to step 3. ### Step 3. Accumulator convergence -All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (32-bit). The associated formula is as follows : +All 4 lane accumulators from the previous steps are merged to produce a single remaining accumulator of the same width (32-bit). The associated formula is as follows: acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18); @@ -124,7 +126,7 @@ Note that, if input length is so large that it requires more than 32-bits, only ### Step 5. Consume remaining input There may be up to 15 bytes remaining to consume from the input. -The final stage will digest them according to following pseudo-code : +The final stage will digest them according to following pseudo-code: while (remainingLength >= 4) { lane = read_32bit_little_endian(input_ptr); @@ -164,17 +166,19 @@ XXH64 Algorithm Description ### Overview -`XXH64` algorithm structure is very similar to `XXH32` one. The major difference is that `XXH64` uses 64-bit arithmetic, speeding up memory transfer for 64-bit compliant systems, but also relying on cpu capability to efficiently perform 64-bit operations. +`XXH64`'s algorithm structure is very similar to `XXH32` one. The major difference is that `XXH64` uses 64-bit arithmetic, speeding up memory transfer for 64-bit compliant systems, but also relying on cpu capability to efficiently perform 64-bit operations. The algorithm collects and transforms input in _stripes_ of 32 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 64-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units. -The algorithm uses 64-bit addition, multiplication, rotate, shift and xor operations. Many operations require some 64-bit prime number constants, all defined below : +The algorithm uses 64-bit addition, multiplication, rotate, shift and xor operations. Many operations require some 64-bit prime number constants, all defined below: + + static const u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; // 0b1001111000110111011110011011000110000101111010111100101010000111 + static const u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; // 0b1100001010110010101011100011110100100111110101001110101101001111 + static const u64 PRIME64_3 = 0x165667B19E3779F9ULL; // 0b0001011001010110011001111011000110011110001101110111100111111001 + static const u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; // 0b1000010111101011110010100111011111000010101100101010111001100011 + static const u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; // 0b0010011111010100111010110010111100010110010101100110011111000101 - static const u64 PRIME64_1 = 11400714785074694791ULL; - static const u64 PRIME64_2 = 14029467366897019727ULL; - static const u64 PRIME64_3 = 1609587929392839161ULL; - static const u64 PRIME64_4 = 9650029242287828579ULL; - static const u64 PRIME64_5 = 2870177450012600261ULL; +These constants are prime numbers, and feature a good mix of bits 1 and 0, neither too regular, nor too dissymmetric. These properties help dispersion capabilities. ### Step 1. Initialise internal accumulators @@ -185,11 +189,11 @@ Each accumulator gets an initial value based on optional `seed` input. Since the u64 acc3 = seed + 0; u64 acc4 = seed - PRIME64_1; -#### Special case : input is less than 32 bytes +#### Special case: input is less than 32 bytes -When input is too small (< 32 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators. +When the input is too small (< 32 bytes), the algorithm will not process any stripes. Consequently, it will not make use of parallel accumulators. -In which case, a simplified initialization is performed, using a single accumulator : +In this case, a simplified initialization is performed, using a single accumulator: u64 acc = seed + PRIME64_5; @@ -203,7 +207,7 @@ The first lane is used to update accumulator 1, the second lane is used to updat Each lane read its associated 64-bit value using __little-endian__ convention. -For each {lane, accumulator}, the update process is called a _round_, and applies the following formula : +For each {lane, accumulator}, the update process is called a _round_, and applies the following formula: round(accN,laneN): accN = accN + (laneN * PRIME64_2); @@ -212,21 +216,21 @@ For each {lane, accumulator}, the update process is called a _round_, and applie This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^64. -Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 32 bytes). +Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except for the last remaining bytes which cannot form a stripe (< 32 bytes). When that happens, move to step 3. ### Step 3. Accumulator convergence All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (64-bit). The associated formula is as follows. -Note that accumulator convergence is more complex than 32-bit variant, and requires to define another function called _mergeAccumulator()_ : +Note that accumulator convergence is more complex than 32-bit variant, and requires to define another function called _mergeAccumulator()_: mergeAccumulator(acc,accN): acc = acc xor round(0, accN); acc = acc * PRIME64_1 return acc + PRIME64_4; -which is then used in the convergence formula : +which is then used in the convergence formula: acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18); acc = mergeAccumulator(acc, acc1); @@ -243,7 +247,7 @@ The input total length is presumed known at this stage. This step is just about ### Step 5. Consume remaining input There may be up to 31 bytes remaining to consume from the input. -The final stage will digest them according to following pseudo-code : +The final stage will digest them according to following pseudo-code: while (remainingLength >= 8) { lane = read_64bit_little_endian(input_ptr); @@ -295,17 +299,19 @@ The algorithm allows input to be streamed and processed in multiple steps. In su On 64-bit systems, the 64-bit variant `XXH64` is generally faster to compute, so it is a recommended variant, even when only 32-bit are needed. -On 32-bit systems though, positions are reversed : `XXH64` performance is reduced, due to its usage of 64-bit arithmetic. `XXH32` becomes a faster variant. +On 32-bit systems though, positions are reversed: `XXH64` performance is reduced, due to its usage of 64-bit arithmetic. `XXH32` becomes a faster variant. Reference Implementation ---------------------------------------- -A reference library written in C is available at http://www.xxhash.com . +A reference library written in C is available at https://www.xxhash.com. The web page also links to multiple other implementations written in many different languages. It links to the [github project page](https://github.com/Cyan4973/xxHash) where an [issue board](https://github.com/Cyan4973/xxHash/issues) can be used for further public discussions on the topic. Version changes -------------------- -v0.1.0 : initial release +v0.7.3: Minor fixes +v0.1.1: added a note on rationale for selection of constants +v0.1.0: initial release diff --git a/deps/xxHash/libxxhash.pc.in b/deps/xxHash/libxxhash.pc.in new file mode 100644 index 000000000..0a52dde20 --- /dev/null +++ b/deps/xxHash/libxxhash.pc.in @@ -0,0 +1,15 @@ +# xxHash - Extremely fast hash algorithm +# Copyright (C) 2012-2020, Yann Collet, Facebook +# BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + +prefix=@PREFIX@ +exec_prefix=@EXECPREFIX@ +includedir=${prefix}/@INCLUDEDIR@ +libdir=${exec_prefix}/@LIBDIR@ + +Name: xxhash +Description: extremely fast hash algorithm +URL: https://www.xxhash.com/ +Version: @VERSION@ +Libs: -L${libdir} -lxxhash +Cflags: -I${includedir} diff --git a/deps/xxHash/tests/Makefile b/deps/xxHash/tests/Makefile new file mode 100644 index 000000000..361032fde --- /dev/null +++ b/deps/xxHash/tests/Makefile @@ -0,0 +1,83 @@ +CFLAGS += -Wall -Wextra -Wundef -g + +NM = nm +GREP = grep + +# Define *.exe as extension for Windows systems +ifneq (,$(filter Windows%,$(OS))) +EXT =.exe +else +EXT = +endif + +ifneq (,$(filter %UTF-8,$(LANG))) +ENABLE_UNICODE ?= 1 +else +ENABLE_UNICODE ?= 0 +endif + +.PHONY: default +default: all + +.PHONY: all +all: test + +.PHONY: test +test: test_multiInclude test_unicode + +.PHONY: test_multiInclude +test_multiInclude: + @$(MAKE) clean + # compile without xxhash.o, ensure symbols exist within target + # Note: built using only default rules + $(MAKE) multiInclude + @$(MAKE) clean + # compile with xxhash.o, to detect duplicated symbols + $(MAKE) multiInclude_withxxhash + @$(MAKE) clean + # Note: XXH_INLINE_ALL with XXH_NAMESPACE is currently disabled + # compile with XXH_NAMESPACE + # CPPFLAGS=-DXXH_NAMESPACE=TESTN_ $(MAKE) multiInclude_withxxhash + # no symbol prefixed TESTN_ should exist + # ! $(NM) multiInclude_withxxhash | $(GREP) TESTN_ + #$(MAKE) clean + # compile with XXH_NAMESPACE and without xxhash.o + # CPPFLAGS=-DXXH_NAMESPACE=TESTN_ $(MAKE) multiInclude + # no symbol prefixed TESTN_ should exist + # ! $(NM) multiInclude | $(GREP) TESTN_ + #@$(MAKE) clean + +.PHONY: test_ppc_redefine +test_ppc_redefine: ppc_define.c + @$(MAKE) clean + $(CC) $(CPPFLAGS) $(CFLAGS) -c $^ + +xxhsum$(EXT): ../xxhash.c ../xxhash.h ../xxhsum.c + $(CC) $(CFLAGS) $(LDFLAGS) ../xxhash.c ../xxhsum.c -o $@ + +# Make sure that Unicode filenames work. +# https://github.com/Cyan4973/xxHash/issues/293 +.PHONY: test_unicode +ifeq (0,$(ENABLE_UNICODE)) +test_unicode: + @echo "Skipping Unicode test, your terminal doesn't appear to support UTF-8." + @echo "Try with ENABLE_UNICODE=1" +else +test_unicode: xxhsum$(EXT) generate_unicode_test.c + # Generate a Unicode filename test dynamically + # to keep UTF-8 out of the source tree. + $(CC) $(CFLAGS) $(LDFLAGS) generate_unicode_test.c -o generate_unicode_test$(EXT) + ./generate_unicode_test$(EXT) + $(SHELL) ./unicode_test.sh +endif + +xxhash.o: ../xxhash.c ../xxhash.h + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -c -o $@ $< + +multiInclude_withxxhash: multiInclude.o xxhash.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $^ + +clean: + @$(RM) *.o + @$(RM) multiInclude multiInclude_withxxhash + @$(RM) *.unicode generate_unicode_test$(EXT) unicode_test.* xxhsum$(EXT) diff --git a/deps/xxHash/tests/bench/.gitignore b/deps/xxHash/tests/bench/.gitignore new file mode 100644 index 000000000..ede2d5880 --- /dev/null +++ b/deps/xxHash/tests/bench/.gitignore @@ -0,0 +1,11 @@ +# build artifacts + +*.o +benchHash +benchHash32 +benchHash_avx2 +benchHash_hw + +# test files + +test* diff --git a/deps/xxHash/tests/bench/LICENSE b/deps/xxHash/tests/bench/LICENSE new file mode 100644 index 000000000..d159169d1 --- /dev/null +++ b/deps/xxHash/tests/bench/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/deps/xxHash/tests/bench/Makefile b/deps/xxHash/tests/bench/Makefile new file mode 100644 index 000000000..cdccfffd6 --- /dev/null +++ b/deps/xxHash/tests/bench/Makefile @@ -0,0 +1,67 @@ +# ################################################################ +# xxHash benchHash Makefile +# Copyright (C) 2019-2020 Yann Collet +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at: +# - xxHash homepage: https://www.xxhash.com +# - xxHash source repository: https://github.com/Cyan4973/xxHash +# ################################################################ +# benchHash: A generic benchmark for hash algorithms +# measuring throughput, latency and bandwidth +# ################################################################ + + +CPPFLAGS += -I../.. # directory of xxHash source files +CFLAGS ?= -O3 +CFLAGS += -std=c99 -Wall -Wextra -Wstrict-aliasing=1 +CFLAGS += $(MOREFLAGS) # custom way to add flags +CXXFLAGS ?= -O3 +LDFLAGS += $(MOREFLAGS) + + +OBJ_LIST = main.o bhDisplay.o benchHash.o benchfn.o timefn.o + + +default: benchHash + +all: benchHash + +benchHash32: CFLAGS += -m32 +benchHash32: CXXFLAGS += -m32 + +benchHash_avx2: CFLAGS += -mavx2 +benchHash_avx2: CXXFLAGS += -mavx2 + +benchHash_hw: CPPFLAGS += -DHARDWARE_SUPPORT +benchHash_hw: CFLAGS += -mavx2 -maes +benchHash_hw: CXXFLAGS += -mavx2 -mpclmul -std=c++14 + +benchHash benchHash32 benchHash_avx2 benchHash_nosimd benchHash_hw: $(OBJ_LIST) + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + + +main.o: bhDisplay.h hashes.h + +bhDisplay.o: bhDisplay.h benchHash.h + +benchHash.o: benchHash.h + + +clean: + $(RM) *.o benchHash benchHash32 benchHash_avx2 benchHash_hw diff --git a/deps/xxHash/tests/bench/benchHash.c b/deps/xxHash/tests/bench/benchHash.c new file mode 100644 index 000000000..05739c7b5 --- /dev/null +++ b/deps/xxHash/tests/bench/benchHash.c @@ -0,0 +1,164 @@ +/* +* Hash benchmark module +* Part of the xxHash project +* Copyright (C) 2019-2020 Yann Collet +* +* GPL v2 License +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +* You can contact the author at: +* - xxHash homepage: https://www.xxhash.com +* - xxHash source repository: https://github.com/Cyan4973/xxHash +*/ + +/* benchmark hash functions */ + +#include // malloc +#include + +#include "benchHash.h" + + +static void initBuffer(void* buffer, size_t size) +{ + const unsigned long long k1 = 11400714785074694791ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ + const unsigned long long k2 = 14029467366897019727ULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ + unsigned long long acc = k2; + unsigned char* const p = (unsigned char*)buffer; + for (size_t s = 0; s < size; s++) { + acc *= k1; + p[s] = (unsigned char)(acc >> 56); + } +} + + +#define MARGIN_FOR_LATENCY 1024 +#define START_MASK (MARGIN_FOR_LATENCY-1) + +typedef size_t (*sizeFunction_f)(size_t targetSize); + +/* + * bench_hash_internal(): + * Benchmarks hashfn repeateadly over single input of size `size` + * return: nb of hashes per second + */ +static double +bench_hash_internal(BMK_benchFn_t hashfn, void* payload, + size_t nbBlocks, sizeFunction_f selectSize, size_t size, + unsigned total_time_ms, unsigned iter_time_ms) +{ + BMK_timedFnState_shell shell; + BMK_timedFnState_t* const txf = BMK_initStatic_timedFnState(&shell, sizeof(shell), total_time_ms, iter_time_ms); + assert(txf != NULL); + + size_t const srcSize = (size_t)size; + size_t const srcBufferSize = srcSize + MARGIN_FOR_LATENCY; + void* const srcBuffer = malloc(srcBufferSize); + assert(srcBuffer != NULL); + initBuffer(srcBuffer, srcBufferSize); + #define FAKE_DSTSIZE 32 + size_t const dstSize = FAKE_DSTSIZE; + char dstBuffer_static[FAKE_DSTSIZE] = {0}; + + #define NB_BLOCKS_MAX 1024 + const void* srcBuffers[NB_BLOCKS_MAX]; + size_t srcSizes[NB_BLOCKS_MAX]; + void* dstBuffers[NB_BLOCKS_MAX]; + size_t dstCapacities[NB_BLOCKS_MAX]; + assert(nbBlocks < NB_BLOCKS_MAX); + + assert(size > 0); + for (size_t n=0; n < nbBlocks; n++) { + srcBuffers[n] = srcBuffer; + srcSizes[n] = selectSize(size); + dstBuffers[n] = dstBuffer_static; + dstCapacities[n] = dstSize; + } + + + BMK_benchParams_t params = { + .benchFn = hashfn, + .benchPayload = payload, + .initFn = NULL, + .initPayload = NULL, + .errorFn = NULL, + .blockCount = nbBlocks, + .srcBuffers = srcBuffers, + .srcSizes = srcSizes, + .dstBuffers = dstBuffers, + .dstCapacities = dstCapacities, + .blockResults = NULL + }; + BMK_runOutcome_t result; + + while (!BMK_isCompleted_TimedFn(txf)) { + result = BMK_benchTimedFn(txf, params); + assert(BMK_isSuccessful_runOutcome(result)); + } + + BMK_runTime_t const runTime = BMK_extract_runTime(result); + + free(srcBuffer); + assert(runTime.nanoSecPerRun != 0); + return (1000000000U / runTime.nanoSecPerRun) * nbBlocks; + +} + + +static size_t rand_1_N(size_t N) { return ((size_t)rand() % N) + 1; } + +static size_t identity(size_t s) { return s; } + +static size_t +benchLatency(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* customPayload) +{ + (void)dst; (void)dstCapacity; + BMK_benchFn_t benchfn = (BMK_benchFn_t)customPayload; + static size_t hash = 0; + + const void* const start = (const char*)src + (hash & START_MASK); + + return hash = benchfn(start, srcSize, dst, dstCapacity, NULL); +} + + + +#ifndef SIZE_TO_HASH_PER_ROUND +# define SIZE_TO_HASH_PER_ROUND 200000 +#endif + +#ifndef NB_HASH_ROUNDS_MAX +# define NB_HASH_ROUNDS_MAX 1000 +#endif + +double bench_hash(BMK_benchFn_t hashfn, + BMK_benchMode benchMode, + size_t size, BMK_sizeMode sizeMode, + unsigned total_time_ms, unsigned iter_time_ms) +{ + sizeFunction_f const sizef = (sizeMode == BMK_fixedSize) ? identity : rand_1_N; + BMK_benchFn_t const benchfn = (benchMode == BMK_throughput) ? hashfn : benchLatency; + BMK_benchFn_t const payload = (benchMode == BMK_throughput) ? NULL : hashfn; + + size_t nbBlocks = (SIZE_TO_HASH_PER_ROUND / size) + 1; + if (nbBlocks > NB_HASH_ROUNDS_MAX) nbBlocks = NB_HASH_ROUNDS_MAX; + + return bench_hash_internal(benchfn, payload, + nbBlocks, sizef, size, + total_time_ms, iter_time_ms); +} diff --git a/deps/xxHash/tests/bench/benchHash.h b/deps/xxHash/tests/bench/benchHash.h new file mode 100644 index 000000000..6c9ba9104 --- /dev/null +++ b/deps/xxHash/tests/bench/benchHash.h @@ -0,0 +1,67 @@ +/* +* Hash benchmark module +* Part of the xxHash project +* Copyright (C) 2019-2020 Yann Collet +* +* GPL v2 License +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +* You can contact the author at: +* - xxHash homepage: https://www.xxhash.com +* - xxHash source repository: https://github.com/Cyan4973/xxHash +*/ + + +#ifndef BENCH_HASH_H_983426678 +#define BENCH_HASH_H_983426678 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* === Dependencies === */ + +#include "benchfn.h" /* BMK_benchFn_t */ + + +/* === Declarations === */ + +typedef enum { BMK_throughput, BMK_latency } BMK_benchMode; + +typedef enum { BMK_fixedSize, /* hash always `size` bytes */ + BMK_randomSize, /* hash a random nb of bytes, between 1 and `size` (inclusive) */ +} BMK_sizeMode; + +/* + * bench_hash(): + * Returns speed expressed as nb hashes per second. + * total_time_ms: time spent benchmarking the hash function with given parameters + * iter_time_ms: time spent for one round. If multiple rounds are run, + * bench_hash() will report the speed of best round. + */ +double bench_hash(BMK_benchFn_t hashfn, + BMK_benchMode benchMode, + size_t size, BMK_sizeMode sizeMode, + unsigned total_time_ms, unsigned iter_time_ms); + + + +#if defined (__cplusplus) +} +#endif + +#endif /* BENCH_HASH_H_983426678 */ diff --git a/deps/xxHash/tests/bench/benchfn.c b/deps/xxHash/tests/bench/benchfn.c new file mode 100644 index 000000000..ec7e9a25d --- /dev/null +++ b/deps/xxHash/tests/bench/benchfn.c @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Includes +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#undef NDEBUG /* assert must not be disabled */ +#include /* assert */ + +#include "timefn.h" /* UTIL_time_t, UTIL_getTime */ +#include "benchfn.h" + + +/* ************************************* +* Constants +***************************************/ +#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */ +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + + +/* ************************************* +* Debug errors +***************************************/ +#if defined(DEBUG) && (DEBUG >= 1) +# include /* fprintf */ +# define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } +#else +# define DEBUGOUTPUT(...) +#endif + + +/* error without displaying */ +#define RETURN_QUIET_ERROR(retValue, ...) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DEBUGOUTPUT("Error : "); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ + return retValue; \ +} + + +/* ************************************* +* Benchmarking an arbitrary function +***************************************/ + +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) +{ + return outcome.error_tag_never_ever_use_directly == 0; +} + +/* warning : this function will stop program execution if outcome is invalid ! + * check outcome validity first, using BMK_isValid_runResult() */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) +{ + assert(outcome.error_tag_never_ever_use_directly == 0); + return outcome.internal_never_ever_use_directly; +} + +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) +{ + assert(outcome.error_tag_never_ever_use_directly != 0); + return outcome.error_result_never_ever_use_directly; +} + +static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) +{ + BMK_runOutcome_t b; + memset(&b, 0, sizeof(b)); + b.error_tag_never_ever_use_directly = 1; + b.error_result_never_ever_use_directly = errorResult; + return b; +} + +static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) +{ + BMK_runOutcome_t outcome; + outcome.error_tag_never_ever_use_directly = 0; + outcome.internal_never_ever_use_directly = runTime; + return outcome; +} + + +/* initFn will be measured once, benchFn will be measured `nbLoops` times */ +/* initFn is optional, provide NULL if none */ +/* benchFn must return a size_t value that errorFn can interpret */ +/* takes # of blocks and list of size & stuff for each. */ +/* can report result of benchFn for each block into blockResult. */ +/* blockResult is optional, provide NULL if this information is not required */ +/* note : time per loop can be reported as zero if run time < timer resolution */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, + unsigned nbLoops) +{ + /* init */ + { size_t i; + for (i = 0; i < p.blockCount; i++) { + memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ + } } + + /* benchmark */ + { UTIL_time_t const clockStart = UTIL_getTime(); + size_t dstSize = 0; + unsigned loopNb, blockNb; + nbLoops += !nbLoops; /* minimum nbLoops is 1 */ + if (p.initFn != NULL) p.initFn(p.initPayload); + for (loopNb = 0; loopNb < nbLoops; loopNb++) { + for (blockNb = 0; blockNb < p.blockCount; blockNb++) { + size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], + p.dstBuffers[blockNb], p.dstCapacities[blockNb], + p.benchPayload); + if (loopNb == 0) { + if (p.blockResults != NULL) p.blockResults[blockNb] = res; + if ((p.errorFn != NULL) && (p.errorFn(res))) { + RETURN_QUIET_ERROR(BMK_runOutcome_error(res), + "Function benchmark failed on block %u (of size %u) with error %i", + blockNb, (unsigned)p.srcSizes[blockNb], (int)res); + } + dstSize += res; + } } + } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ + + { PTime const totalTime = UTIL_clockSpanNano(clockStart); + BMK_runTime_t rt; + rt.nanoSecPerRun = (double)totalTime / nbLoops; + rt.sumOfReturn = dstSize; + return BMK_setValid_runTime(rt); + } } +} + + +/* ==== Benchmarking any function, providing intermediate results ==== */ + +struct BMK_timedFnState_s { + PTime timeSpent_ns; + PTime timeBudget_ns; + PTime runBudget_ns; + BMK_runTime_t fastestRun; + unsigned nbLoops; + UTIL_time_t coolTime; +}; /* typedef'd to BMK_timedFnState_t within bench.h */ + +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) +{ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); + if (r == NULL) return NULL; /* malloc() error */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } + +BMK_timedFnState_t* +BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms) +{ + typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */ + typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */ + size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer; + if (buffer == NULL) return NULL; + if (size < sizeof(struct BMK_timedFnState_s)) return NULL; + if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) +{ + if (!total_ms) total_ms = 1 ; + if (!run_ms) run_ms = 1; + if (run_ms > total_ms) run_ms = total_ms; + timedFnState->timeSpent_ns = 0; + timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */ + timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); + timedFnState->nbLoops = 1; + timedFnState->coolTime = UTIL_getTime(); +} + +/* Tells if nb of seconds set in timedFnState for all runs is spent. + * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) +{ + return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); +} + + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ + +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, + BMK_benchParams_t p) +{ + PTime const runBudget_ns = cont->runBudget_ns; + PTime const runTimeMin_ns = runBudget_ns / 2; + BMK_runTime_t bestRunTime = cont->fastestRun; + + for (;;) { + BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops); + + if (!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ + return runResult; + } + + { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); + double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; + + cont->timeSpent_ns += (unsigned long long)loopDuration_ns; + + /* estimate nbLoops for next run to last approximately 1 second */ + if (loopDuration_ns > (runBudget_ns / 50)) { + double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); + cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1; + } else { + /* previous run was too short : blindly increase workload by x multiplier */ + const unsigned multiplier = 10; + assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->nbLoops *= multiplier; + } + + if (loopDuration_ns < runTimeMin_ns) { + /* When benchmark run time is too small : don't report results. + * increased risks of rounding errors */ + continue; + } + + if (newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { + bestRunTime = newRunTime; + } + } + break; + } /* while (!completed) */ + + return BMK_setValid_runTime(bestRunTime); +} diff --git a/deps/xxHash/tests/bench/benchfn.h b/deps/xxHash/tests/bench/benchfn.h new file mode 100644 index 000000000..42d103383 --- /dev/null +++ b/deps/xxHash/tests/bench/benchfn.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* benchfn : + * benchmark any function on a set of input + * providing result in nanoSecPerRun + * or detecting and returning an error + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef BENCH_FN_H_23876 +#define BENCH_FN_H_23876 + +/* === Dependencies === */ +#include /* size_t */ + + +/* ==== Benchmark any function, iterated on a set of blocks ==== */ + +/* BMK_runTime_t: valid result return type */ + +typedef struct { + double nanoSecPerRun; /* time per iteration (over all blocks) */ + size_t sumOfReturn; /* sum of return values */ +} BMK_runTime_t; + + +/* BMK_runOutcome_t: + * type expressing the outcome of a benchmark run by BMK_benchFunction(), + * which can be either valid or invalid. + * benchmark outcome can be invalid if errorFn is provided. + * BMK_runOutcome_t must be considered "opaque" : never access its members directly. + * Instead, use its assigned methods : + * BMK_isSuccessful_runOutcome, BMK_extract_runTime, BMK_extract_errorResult. + * The structure is only described here to allow its allocation on stack. */ + +typedef struct { + BMK_runTime_t internal_never_ever_use_directly; + size_t error_result_never_ever_use_directly; + int error_tag_never_ever_use_directly; +} BMK_runOutcome_t; + + +/* prototypes for benchmarked functions */ +typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload); +typedef size_t (*BMK_initFn_t)(void* initPayload); +typedef unsigned (*BMK_errorFn_t)(size_t); + + +/* BMK_benchFunction() parameters are provided via the following structure. + * A structure is preferable for readability, + * as the number of parameters required is fairly large. + * No initializer is provided, because it doesn't make sense to provide some "default" : + * all parameters must be specified by the caller. + * optional parameters are labelled explicitly, and accept value NULL when not used */ +typedef struct { + BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */ + void* benchPayload; /* pass custom parameters to benchFn : + * (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */ + BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */ + void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */ + BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not. + * errorFn can be NULL, in which case no check is performed. + * errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error. + * Execution is stopped as soon as an error is detected. + * the triggering return value can be retrieved using BMK_extract_errorResult(). */ + size_t blockCount; /* number of blocks to operate benchFn on. + * It's also the size of all array parameters : + * srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */ + const void *const * srcBuffers; /* read-only array of buffers to be operated on by benchFn */ + const size_t* srcSizes; /* read-only array containing sizes of srcBuffers */ + void *const * dstBuffers; /* array of buffers to be written into by benchFn. This array is not optional, it must be provided even if unused by benchfn. */ + const size_t* dstCapacities; /* read-only array containing capacities of dstBuffers. This array must be present. */ + size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */ +} BMK_benchParams_t; + + +/* BMK_benchFunction() : + * This function benchmarks benchFn and initFn, providing a result. + * + * params : see description of BMK_benchParams_t above. + * nbLoops: defines number of times benchFn is run over the full set of blocks. + * Minimum value is 1. A 0 is interpreted as a 1. + * + * @return: can express either an error or a successful result. + * Use BMK_isSuccessful_runOutcome() to check if benchmark was successful. + * If yes, extract the result with BMK_extract_runTime(), + * it will contain : + * .sumOfReturn : the sum of all return values of benchFn through all of blocks + * .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops) + * .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer, + * in which case, this value will be the total amount of bytes written into dstBuffer. + * + * blockResults : when provided (!= NULL), and when benchmark is successful, + * params.blockResults contains all return values of `benchFn` over all blocks. + * when provided (!= NULL), and when benchmark failed, + * params.blockResults contains return values of `benchFn` over all blocks preceding and including the failed block. + */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t params, unsigned nbLoops); + + + +/* check first if the benchmark was successful or not */ +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome); + +/* If the benchmark was successful, extract the result. + * note : this function will abort() program execution if benchmark failed ! + * always check if benchmark was successful first ! + */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); + +/* when benchmark failed, it means one invocation of `benchFn` failed. + * The failure was detected by `errorFn`, operating on return values of `benchFn`. + * Returns the faulty return value. + * note : this function will abort() program execution if benchmark did not failed. + * always check if benchmark failed first ! + */ +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome); + + + +/* ==== Benchmark any function, returning intermediate results ==== */ + +/* state information tracking benchmark session */ +typedef struct BMK_timedFnState_s BMK_timedFnState_t; + +/* BMK_benchTimedFn() : + * Similar to BMK_benchFunction(), most arguments being identical. + * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. + * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. + * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) + * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms + * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() + */ +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* timedFnState, + BMK_benchParams_t params); + +/* Tells if duration of all benchmark runs has exceeded total_ms + */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); + +/* BMK_createTimedFnState() and BMK_resetTimedFnState() : + * Create/Set BMK_timedFnState_t for next benchmark session, + * which shall last a minimum of total_ms milliseconds, + * producing intermediate results, paced at interval of (approximately) run_ms. + */ +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); +void BMK_freeTimedFnState(BMK_timedFnState_t* state); + + +/* BMK_timedFnState_shell and BMK_initStatic_timedFnState() : + * Makes it possible to statically allocate a BMK_timedFnState_t on stack. + * BMK_timedFnState_shell is only there to allocate space, + * never ever access its members. + * BMK_timedFnState_t() actually accepts any buffer. + * It will check if provided buffer is large enough and is correctly aligned, + * and will return NULL if conditions are not respected. + */ +#define BMK_TIMEDFNSTATE_SIZE 64 +typedef union { + char never_access_space[BMK_TIMEDFNSTATE_SIZE]; + long long alignment_enforcer; /* must be aligned on 8-bytes boundaries */ +} BMK_timedFnState_shell; +BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms); + + +#endif /* BENCH_FN_H_23876 */ + +#if defined (__cplusplus) +} +#endif diff --git a/deps/xxHash/tests/bench/bhDisplay.c b/deps/xxHash/tests/bench/bhDisplay.c new file mode 100644 index 000000000..6cf1a5375 --- /dev/null +++ b/deps/xxHash/tests/bench/bhDisplay.c @@ -0,0 +1,159 @@ +/* +* CSV Display module for the hash benchmark program +* Part of the xxHash project +* Copyright (C) 2019-2020 Yann Collet +* +* GPL v2 License +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +* You can contact the author at : +* - xxHash homepage : https://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* === Dependencies === */ + +#include /* rand */ +#include /* printf */ +#include + +#include "benchHash.h" +#include "bhDisplay.h" + + +/* === benchmark large input === */ + +#define MB_UNIT 1000000 +#define BENCH_LARGE_ITER_MS 490 +#define BENCH_LARGE_TOTAL_MS 1010 +static void bench_oneHash_largeInput(Bench_Entry hashDesc, int minlog, int maxlog) +{ + printf("%-7s", hashDesc.name); + for (int sizelog=minlog; sizelog<=maxlog; sizelog++) { + size_t const inputSize = (size_t)1 << sizelog; + double const nbhps = bench_hash(hashDesc.hash, BMK_throughput, + inputSize, BMK_fixedSize, + BENCH_LARGE_TOTAL_MS, BENCH_LARGE_ITER_MS); + printf(",%6.0f", nbhps * inputSize / MB_UNIT); fflush(NULL); + } + printf("\n"); +} + +void bench_largeInput(Bench_Entry const* hashDescTable, int nbHashes, int minlog, int maxlog) +{ + assert(maxlog < 31); + assert(minlog >= 0); + printf("benchmarking large inputs : from %u bytes (log%i) to %u MB (log%i) \n", + 1U << minlog, minlog, + (1U << maxlog) >> 20, maxlog); + for (int i=0; i /* size_t */ + + +/* ================================================== + * Non-portable hash algorithms + * =============================================== */ + + +#ifdef HARDWARE_SUPPORT + +/* + * List any hash algorithms that depend on specific hardware support, + * including for example: + * - Hardware crc32c + * - Hardware AES support + * - Carryless Multipliers (clmul) + * - AVX2 + */ + +#endif + + + +/* ================================================== + * List of hashes + * ================================================== + * Each hash must be wrapped in a thin redirector conformant with the BMK_benchfn_t. + * BMK_benchfn_t is generic, not specifically designed for hashes. + * For hashes, the following parameters are expected to be useless: + * dst, dstCapacity, customPayload. + * + * The result of each hash is assumed to be provided as function return value. + * This condition is important for latency measurements. + */ + + /* === xxHash === */ +#define XXH_INLINE_ALL +#include "xxhash.h" + +size_t XXH32_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH32(src, srcSize, 0); +} + + +size_t XXH64_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH64(src, srcSize, 0); +} + + +size_t xxh3_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH3_64bits(src, srcSize); +} + + +size_t XXH128_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH3_128bits(src, srcSize).low64; +} + + + +/* ================================================== + * Table of hashes + * =============================================== */ + +#include "bhDisplay.h" /* Bench_Entry */ + +#ifndef HARDWARE_SUPPORT +# define NB_HASHES 4 +#else +# define NB_HASHES 4 +#endif + +Bench_Entry const hashCandidates[NB_HASHES] = { + { "xxh3" , xxh3_wrapper }, + { "XXH32" , XXH32_wrapper }, + { "XXH64" , XXH64_wrapper }, + { "XXH128", XXH128_wrapper }, +#ifdef HARDWARE_SUPPORT + /* list here codecs which require specific hardware support, such SSE4.1, PCLMUL, AVX2, etc. */ +#endif +}; diff --git a/deps/xxHash/tests/bench/main.c b/deps/xxHash/tests/bench/main.c new file mode 100644 index 000000000..85c4364b8 --- /dev/null +++ b/deps/xxHash/tests/bench/main.c @@ -0,0 +1,220 @@ +/* + * Main program to benchmark hash functions + * Part of the xxHash project + * Copyright (C) 2019-2020 Yann Collet + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +/* === dependencies === */ + +#include /* printf */ +#include /* INT_MAX */ +#include "bhDisplay.h" /* bench_x */ + + +/* === defines list of hashes `hashCandidates` and NB_HASHES *** */ + +#include "hashes.h" + + +/* === parse command line === */ + +#undef NDEBUG +#include + + +/*! + * readIntFromChar(): + * Allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + */ +static int readIntFromChar(const char** stringPtr) +{ + static int const max = (INT_MAX / 10) - 1; + int result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + assert(result < max); + result *= 10; + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + int const maxK = INT_MAX >> 10; + assert(result < maxK); + result <<= 10; + if (**stringPtr=='M') { + assert(result < maxK); + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + + +/** + * longCommand(): + * Checks if string is the same as longCommand. + * If yes, @return 1, otherwise @return 0 + */ +static int isCommand(const char* string, const char* longCommand) +{ + assert(string); + assert(longCommand); + size_t const comSize = strlen(longCommand); + return !strncmp(string, longCommand, comSize); +} + +/* + * longCommandWArg(): + * Checks if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately + * follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + assert(stringPtr); + assert(longCommand); + size_t const comSize = strlen(longCommand); + int const result = isCommand(*stringPtr, longCommand); + if (result) *stringPtr += comSize; + return result; +} + + +/* === default values - can be redefined at compilation time === */ + +#ifndef SMALL_SIZE_MIN_DEFAULT +# define SMALL_SIZE_MIN_DEFAULT 1 +#endif +#ifndef SMALL_SIZE_MAX_DEFAULT +# define SMALL_SIZE_MAX_DEFAULT 127 +#endif +#ifndef LARGE_SIZELOG_MIN_DEFAULT +# define LARGE_SIZELOG_MIN_DEFAULT 9 +#endif +#ifndef LARGE_SIZELOG_MAX_DEFAULT +# define LARGE_SIZELOG_MAX_DEFAULT 27 +#endif + + +static int display_hash_names(void) +{ + int i; + printf("available hashes : \n"); + for (i=0; i= 1); return help(exename); } + if (isCommand(*arg, "--list")) { return display_hash_names(); } + if (longCommandWArg(arg, "--n=")) { nb_h_test = readIntFromChar(arg); continue; } /* hidden command */ + if (longCommandWArg(arg, "--minl=")) { largeTest_log_min = readIntFromChar(arg); continue; } + if (longCommandWArg(arg, "--maxl=")) { largeTest_log_max = readIntFromChar(arg); continue; } + if (longCommandWArg(arg, "--mins=")) { smallTest_size_min = (size_t)readIntFromChar(arg); continue; } + if (longCommandWArg(arg, "--maxs=")) { smallTest_size_max = (size_t)readIntFromChar(arg); continue; } + /* not a command: must be a hash name */ + hashNb = hashID(*arg); + if (hashNb >= 0) { + nb_h_test = 1; + } else { + /* not a hash name: error */ + return badusage(exename); + } + } + + /* border case (requires (mis)using hidden command `--n=#`) */ + if (hashNb + nb_h_test > NB_HASHES) { + printf("wrong hash selection \n"); + return 1; + } + + printf(" === benchmarking %i hash functions === \n", nb_h_test); + if (largeTest_log_max >= largeTest_log_min) { + bench_largeInput(hashCandidates+hashNb, nb_h_test, largeTest_log_min, largeTest_log_max); + } + if (smallTest_size_max >= smallTest_size_min) { + bench_throughput_smallInputs(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + bench_throughput_randomInputLength(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + bench_latency_smallInputs(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + bench_latency_randomInputLength(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + } + + return 0; +} diff --git a/deps/xxHash/tests/bench/timefn.c b/deps/xxHash/tests/bench/timefn.c new file mode 100644 index 000000000..856812898 --- /dev/null +++ b/deps/xxHash/tests/bench/timefn.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2019-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* === Dependencies === */ + +#include "timefn.h" + + +/*-**************************************** +* Time functions +******************************************/ + +#if defined(_WIN32) /* Windows */ + +#include /* abort */ +#include /* perror */ + +UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } + +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) { + perror("timefn::QueryPerformanceFrequency"); + abort(); + } + init = 1; + } + return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) { + perror("timefn::QueryPerformanceFrequency"); + abort(); + } + init = 1; + } + return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; +} + + + +#elif defined(__APPLE__) && defined(__MACH__) + +UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } + +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return (((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom))/1000ULL; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return ((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom); +} + + + +#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ + && defined(TIME_UTC) /* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance */ + +#include /* abort */ +#include /* perror */ + +UTIL_time_t UTIL_getTime(void) +{ + /* time must be initialized, othersize it may fail msan test. + * No good reason, likely a limitation of timespec_get() for some target */ + UTIL_time_t time = UTIL_TIME_INITIALIZER; + if (timespec_get(&time, TIME_UTC) != TIME_UTC) { + perror("timefn::timespec_get"); + abort(); + } + return time; +} + +static UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t diff; + if (end.tv_nsec < begin.tv_nsec) { + diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; + diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; + } else { + diff.tv_sec = end.tv_sec - begin.tv_sec; + diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + } + return diff; +} + +PTime UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + PTime micro = 0; + micro += 1000000ULL * diff.tv_sec; + micro += diff.tv_nsec / 1000ULL; + return micro; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + PTime nano = 0; + nano += 1000000000ULL * diff.tv_sec; + nano += diff.tv_nsec; + return nano; +} + + + +#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ + +UTIL_time_t UTIL_getTime(void) { return clock(); } +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + +#endif + + + +/* returns time span in microseconds */ +PTime UTIL_clockSpanMicro(UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeMicro(clockStart, clockEnd); +} + +/* returns time span in microseconds */ +PTime UTIL_clockSpanNano(UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeNano(clockStart, clockEnd); +} + +void UTIL_waitForNextTick(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + UTIL_time_t clockEnd; + do { + clockEnd = UTIL_getTime(); + } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); +} diff --git a/deps/xxHash/tests/bench/timefn.h b/deps/xxHash/tests/bench/timefn.h new file mode 100644 index 000000000..41007f30c --- /dev/null +++ b/deps/xxHash/tests/bench/timefn.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef TIME_FN_H_MODULE_287987 +#define TIME_FN_H_MODULE_287987 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-**************************************** +* Dependencies +******************************************/ +#include /* utime */ +#if defined(_MSC_VER) +# include /* utime */ +#else +# include /* utime */ +#endif +#include /* clock_t, clock, CLOCKS_PER_SEC */ + + + +/*-**************************************** +* Local Types +******************************************/ + +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t PTime; /* Precise Time */ +#else + typedef unsigned long long PTime; /* does not support compilers without long long support */ +#endif + + + +/*-**************************************** +* Time functions +******************************************/ +#if defined(_WIN32) /* Windows */ + + #include /* LARGE_INTEGER */ + typedef LARGE_INTEGER UTIL_time_t; + #define UTIL_TIME_INITIALIZER { { 0, 0 } } + +#elif defined(__APPLE__) && defined(__MACH__) + + #include + typedef PTime UTIL_time_t; + #define UTIL_TIME_INITIALIZER 0 + +#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ + && defined(TIME_UTC) /* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance */ + + typedef struct timespec UTIL_time_t; + #define UTIL_TIME_INITIALIZER { 0, 0 } + +#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ + + typedef clock_t UTIL_time_t; + #define UTIL_TIME_INITIALIZER 0 + +#endif + + +UTIL_time_t UTIL_getTime(void); +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd); +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd); + +#define SEC_TO_MICRO ((PTime)1000000) +PTime UTIL_clockSpanMicro(UTIL_time_t clockStart); +PTime UTIL_clockSpanNano(UTIL_time_t clockStart); + +void UTIL_waitForNextTick(void); + + +#if defined (__cplusplus) +} +#endif + +#endif /* TIME_FN_H_MODULE_287987 */ diff --git a/deps/xxHash/tests/collisions/.gitignore b/deps/xxHash/tests/collisions/.gitignore new file mode 100644 index 000000000..f85592639 --- /dev/null +++ b/deps/xxHash/tests/collisions/.gitignore @@ -0,0 +1,2 @@ +#build artefacts +collisionsTest diff --git a/deps/xxHash/tests/collisions/LICENSE b/deps/xxHash/tests/collisions/LICENSE new file mode 100644 index 000000000..d159169d1 --- /dev/null +++ b/deps/xxHash/tests/collisions/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/deps/xxHash/tests/collisions/Makefile b/deps/xxHash/tests/collisions/Makefile new file mode 100644 index 000000000..bad9835b0 --- /dev/null +++ b/deps/xxHash/tests/collisions/Makefile @@ -0,0 +1,74 @@ +# Brute force collision tester for 64-bit hashes +# Part of xxHash project +# Copyright (C) 2019-2020 Yann Collet +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at: +# - xxHash homepage: https://www.xxhash.com +# - xxHash source repository: https://github.com/Cyan4973/xxHash +# + +SRC_DIRS = ./ ../../ allcodecs/ +VPATH = $(SRC_DIRS) +CPPFLAGS += $(addprefix -I ,$(SRC_DIRS)) +CFLAGS ?= -std=c99 \ + -Wall -Wextra -Wconversion +CXXFLAGS ?= -Wall -Wextra -Wconversion -std=c++11 +LDFLAGS += -pthread +TESTHASHES = 110000000 + +HASH_SRC := $(sort $(wildcard allcodecs/*.c allcodecs/*.cc)) +HASH_OBJ := $(patsubst %.c,%.o,$(HASH_SRC)) + + +.PHONY: default +default: release + +.PHONY: all +all: release + +collisionsTest: main.o pool.o threading.o sort.o $(HASH_OBJ) + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +main.o: hashes.h xxhash.h + +release: CXXFLAGS += -O3 +release: CFLAGS += -O3 +release: collisionsTest + +debug: CXXFLAGS += -g3 -O0 -DDEBUG +debug: CFLAGS += -g3 -O0 -DDEBUG +debug: collisionsTest + +.PHONY: check +check: test + +.PHONY: test +test: debug + @echo "" + @echo "## $(TESTHASHES) hashes with original and 0 threads" + @time ./collisionsTest --nbh=$(TESTHASHES) + @echo "" + @echo "## $(TESTHASHES) hashes with original and 4 threads" + @time ./collisionsTest --nbh=$(TESTHASHES) --threadlog=2 + @echo "" + +.PHONY: clean +clean: + $(RM) *.o allcodecs/*.o + $(RM) collisionsTest diff --git a/deps/xxHash/tests/collisions/README.md b/deps/xxHash/tests/collisions/README.md new file mode 100644 index 000000000..683b11518 --- /dev/null +++ b/deps/xxHash/tests/collisions/README.md @@ -0,0 +1,122 @@ + +__collisionsTest__ is a brute force hash analyzer +which will measure a 64-bit hash algorithm's collision rate +by generating billions of hashes, +and comparing the result to an "ideal" target. + +The test requires a very large amount of memory. +By default, it will generate 24 billion of 64-bit hashes, +requiring __192 GB of RAM__ for their storage. +The number of hashes can be modified using command `--nbh=`. +Be aware that testing the collision ratio of 64-bit hashes +requires a very large amount of hashes (several billion) for meaningful measurements. + +To reduce RAM usage, an optional filter can be requested, with `--filter`. +It reduces the nb of candidates to analyze, hence associated RAM budget. +Note that the filter itself requires a lot of RAM +(32 GB by default, can be modified using `--filterlog=`, +a too small filter will not be efficient, aim at ~2 bytes per hash), +and reading and writing into filter cost a significant CPU budget, +so this method is slower. +It also doesn't allow advanced analysis of partial bitfields, +since most hashes will be discarded and not stored. + +When using the filter, the RAM budget consists of the filter and a list of candidates, +which will be a fraction of the original hash list. +Using default settings (24 billion hashes, 32 GB filter), +the number of potential candidates should be reduced to less than 2 billion, +requiring ~14 GB for their storage. +Such a result also depends on hash algorithm's efficiency. +The number of effective candidates is likely to be lower, at ~ 1 billion, +but storage must allocate an upper bound. + +For the default test, the expected "optimal" collision rate for a 64-bit hash function is ~18 collisions. + +#### How to build +``` +make +``` + +Note: the code is a mix of C99 and C++14, +it's not compatible with a C90-only compiler. + +#### Build modifier + +- `SLAB5`: use alternative pattern generator, friendlier for weak hash algorithms +- `POOL_MT`: if `=0`, disable multi-threading code (enabled by default) + +#### How to integrate any hash in the tester + +The build script will compile files found in `./allcodecs`. +Put the source code here. +This also works if the hash is a single `*.h` file. + +The glue happens in `hashes.h`. +In this file, there are 2 sections: +- Adds the required `#include "header.h"`, and creates a wrapper +to respect the format expected by the function pointer. +- Adds the wrapper, along with the name and an indication of the output width, +to the table, at the end of `hashes.h` + +Build with `make`. Locate your new hash with `./collisionsTest -h`, +it should be listed. + + +#### Usage + +``` +usage: ./collisionsTest [hashName] [opt] + +list of hashNames: (...) + +Optional parameters: + --nbh=NB Select nb of hashes to generate (25769803776 by default) + --filter Enable the filter. Slower, but reduces memory usage for same nb of hashes. + --threadlog=NB Use 2^NB threads + --len=NB Select length of input (255 bytes by default) +``` + +#### Some advises on how to setup a collisions test + +Most tests are primarily driven by the amount of RAM available. +Here's a method to decide the size of the test. + +Presuming that RAM budget is not plentiful, for this example 32 GB, +the `--filter` mode is actually compulsory to measure anything meaningful. +Let's plan 50% of memory for the filter, that's 16 GB. +This will be good enough to filter about 10% less hashes than this size. +Let's round down to 14 G. + +By requesting 14G, the expectation is that the program will automatically +size the filter to 16 GB, and expect to store ~1G candidates, +leaving enough room to breeze for the system. + +The command line becomes: +``` +./collisionsTest --nbh=14G --filter NameOfHash +``` + +#### Examples: + +Here are a few results produced with this tester: + +| Algorithm | Input Len | Nb Hashes | Expected | Nb Collisions | Notes | +| --- | --- | --- | --- | --- | --- | +| __XXH3__ | 255 | 100 Gi | 312.5 | 326 | | +| __XXH64__ | 255 | 100 Gi | 312.5 | 294 | | +| __XXH128__ low64 | 512 | 100 Gi | 312.5 | 321 | | +| __XXH128__ high64| 512 | 100 Gi | 312.5 | 325 | | +| __XXH128__ | 255 | 100 Gi | 0.0 | 0 | a 128-bit hash is expected to generate 0 collisions | + +Test on small inputs: + +| Algorithm | Input Len | Nb Hashes | Expected | Nb Collisions | Notes | +| --- | --- | --- | --- | --- | --- | +| __XXH64__ | 8 | 100 Gi | 312.5 | __0__ | `XXH64` is bijective for `len==8` | +| __XXH3__ | 8 | 100 Gi | 312.5 | __0__ | `XXH3` is also bijective for `len==8` | +| __XXH3__ | 16 | 100 Gi | 312.5 | 332 | | +| __XXH3__ | 32 | 14 Gi | 6.1 | 3 | | +| __XXH128__ | 16 | 25 Gi | 0.0 | 0 | test range 9-16 | +| __XXH128__ | 32 | 25 Gi | 0.0 | 0 | test range 17-128 | +| __XXH128__ | 100 | 13 Gi | 0.0 | 0 | test range 17-128 | +| __XXH128__ | 200 | 13 Gi | 0.0 | 0 | test range 129-240 | diff --git a/deps/xxHash/tests/collisions/allcodecs/README.md b/deps/xxHash/tests/collisions/allcodecs/README.md new file mode 100644 index 000000000..d41fc2dbb --- /dev/null +++ b/deps/xxHash/tests/collisions/allcodecs/README.md @@ -0,0 +1 @@ +Put in this directory all hash algorithms to test diff --git a/deps/xxHash/tests/collisions/allcodecs/dummy.c b/deps/xxHash/tests/collisions/allcodecs/dummy.c new file mode 100644 index 000000000..547d5c736 --- /dev/null +++ b/deps/xxHash/tests/collisions/allcodecs/dummy.c @@ -0,0 +1,38 @@ +/* + * dummy.c, a fake hash algorithm, just to test integration capabilities. + * Part of the xxHash project + * Copyright (C) 2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +#include + +unsigned badsum32(const void* input, size_t len, unsigned seed) +{ + unsigned sum = seed; + const unsigned char* in8 = input; + size_t c; + for (c=0; c /* size_t */ + +unsigned badsum32(const void* input, size_t len, unsigned seed); + + +#if defined (__cplusplus) +} +#endif + +#endif /* DUMMY_H_987987 */ diff --git a/deps/xxHash/tests/collisions/hashes.h b/deps/xxHash/tests/collisions/hashes.h new file mode 100644 index 000000000..0b7223d9f --- /dev/null +++ b/deps/xxHash/tests/collisions/hashes.h @@ -0,0 +1,127 @@ +/* + * List of hashes for the brute force collision tester + * Part of xxHash project + * Copyright (C) 2019-2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#ifndef HASHES_H_1235465 +#define HASHES_H_1235465 + +#include /* size_t */ +#include /* uint64_t */ +#define XXH_INLINE_ALL /* XXH128_hash_t */ +#include "xxhash.h" + + +/* return type */ + +typedef union { + uint64_t h64; + XXH128_hash_t h128; +} UniHash; + +UniHash uniHash32(uint64_t v32) +{ UniHash unih; + unih.h64 = v32; + return unih; +} + +UniHash uniHash64(uint64_t v64) +{ UniHash unih; + unih.h64 = v64; + return unih; +} + +UniHash uniHash128(XXH128_hash_t v128) +{ UniHash unih; + unih.h128 = v128; + return unih; +} + + +/* === xxHash === */ + +UniHash XXH3_wrapper (const void* data, size_t size) +{ + return uniHash64( XXH3_64bits(data, size) ); +} + +UniHash XXH128_wrapper (const void* data, size_t size) +{ + return uniHash128( XXH3_128bits(data, size) ); +} + +UniHash XXH128l_wrapper (const void* data, size_t size) +{ + return uniHash64( XXH3_128bits(data, size).low64 ); +} + +UniHash XXH128h_wrapper (const void* data, size_t size) +{ + return uniHash64( XXH3_128bits(data, size).high64 ); +} + +UniHash XXH64_wrapper (const void* data, size_t size) +{ + return uniHash64 ( XXH64(data, size, 0) ); +} + +UniHash XXH32_wrapper (const void* data, size_t size) +{ + return uniHash32( XXH32(data, size, 0) ); +} + +/* === Dummy integration example === */ + +#include "dummy.h" + +UniHash badsum32_wrapper (const void* data, size_t size) +{ + return uniHash32( badsum32(data, size, 0) ); +} + + + +/* === Table === */ + +typedef UniHash (*hashfn) (const void* data, size_t size); + +typedef struct { + const char* name; + hashfn fn; + int bits; +} hashDescription; + +#define HASH_FN_TOTAL 7 + +hashDescription hashfnTable[HASH_FN_TOTAL] = { + { "xxh3" , XXH3_wrapper, 64 }, + { "xxh64" , XXH64_wrapper, 64 }, + { "xxh128", XXH128_wrapper, 128 }, + { "xxh128l", XXH128l_wrapper, 64 }, + { "xxh128h", XXH128h_wrapper, 64 }, + { "xxh32" , XXH32_wrapper, 32 }, + { "badsum32",badsum32_wrapper, 32 }, +}; + +#endif /* HASHES_H_1235465 */ diff --git a/deps/xxHash/tests/collisions/main.c b/deps/xxHash/tests/collisions/main.c new file mode 100644 index 000000000..3cdf5b4e4 --- /dev/null +++ b/deps/xxHash/tests/collisions/main.c @@ -0,0 +1,1124 @@ +/* + * Brute force collision tester for 64-bit hashes + * Part of the xxHash project + * Copyright (C) 2019-2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * The collision tester will generate 24 billion hashes (by default), + * and count how many collisions were produced by the 64-bit hash algorithm. + * The optimal amount of collisions for 64-bit is ~18 collisions. + * A good hash should be close to this figure. + * + * This program requires a lot of memory: + * - Either store hash values directly => 192 GB + * - Or use a filter: + * - 32 GB (by default) for the filter itself + * - + ~14 GB for the list of hashes (depending on the filter's outcome) + * Due to these memory constraints, it requires a 64-bit system. + */ + + + /* === Dependencies === */ + +#include /* uint64_t */ +#include /* malloc, free, qsort, exit */ +#include /* memset */ +#include /* printf, fflush */ + +#undef NDEBUG /* ensure assert is _not_ disabled */ +#include + +#include "hashes.h" /* UniHash, hashfn, hashfnTable */ + +#include "sort.hh" /* sort64 */ + + + +typedef enum { ht32, ht64, ht128 } Htype_e; + +/* === Debug === */ + +#define EXIT(...) { printf(__VA_ARGS__); printf("\n"); exit(1); } + +static void hexRaw(const void* buffer, size_t size) +{ + const unsigned char* p = (const unsigned char*)buffer; + for (size_t i=0; i> 33; + h64 *= prime64_2; + h64 ^= h64 >> 29; + h64 *= prime64_3; + h64 ^= h64 >> 32; + return h64; +} + +static unsigned char randomByte(size_t n) +{ + uint64_t n64 = avalanche64(n+1); + n64 *= prime64_1; + return (unsigned char)(n64 >> 56); +} + +typedef enum { sf_slab5, sf_sparse } sf_genMode; + + +#ifdef SLAB5 + +/* + * Slab5 sample generation. + * This algorithm generates unique inputs flipping on average 16 bits per candidate. + * It is generally much more friendly for most hash algorithms, especially + * weaker ones, as it shuffles more the input. + * The algorithm also avoids overfitting the per4 or per8 ingestion patterns. + */ + +#define SLAB_SIZE 5 + +typedef struct { + void* buffer; + size_t size; + sf_genMode mode; + size_t prngSeed; + uint64_t hnb; +} sampleFactory; + +static void init_sampleFactory(sampleFactory* sf, uint64_t htotal) +{ + uint64_t const minNbSlabs = ((htotal-1) >> 32) + 1; + uint64_t const minSize = minNbSlabs * SLAB_SIZE; + if (sf->size < minSize) + EXIT("sample size must be >= %i bytes for this amount of hashes", + (int)minSize); + + unsigned char* const p = (unsigned char*)sf->buffer; + for (size_t n=0; n < sf->size; n++) + p[n] = randomByte(n); + sf->hnb = 0; +} + +static sampleFactory* +create_sampleFactory(size_t size, uint64_t htotal, uint64_t seed) +{ + sampleFactory* const sf = malloc(sizeof(sampleFactory)); + if (!sf) EXIT("not enough memory"); + void* const buffer = malloc(size); + if (!buffer) EXIT("not enough memory"); + sf->buffer = buffer; + sf->size = size; + sf->mode = sf_slab5; + sf->prngSeed = seed; + init_sampleFactory(sf, htotal); + return sf; +} + +static void free_sampleFactory(sampleFactory* sf) +{ + if (!sf) return; + free(sf->buffer); + free(sf); +} + +static inline void update_sampleFactory(sampleFactory* sf) +{ + size_t const nbSlabs = sf->size / SLAB_SIZE; + size_t const SlabNb = sf->hnb % nbSlabs; + sf->hnb++; + + char* const ptr = (char*)sf->buffer; + size_t const start = (SlabNb * SLAB_SIZE) + 1; + uint32_t val32; + memcpy(&val32, ptr+start, sizeof(val32)); + static const uint32_t prime32_5 = 374761393U; + val32 += prime32_5; + memcpy(ptr+start, &val32, sizeof(val32)); +} + +#else + +/* + * Sparse sample generation. + * This is the default pattern generator. + * It only flips one bit at a time (mostly). + * Low hamming distance scenario is more difficult for weak hash algorithms. + * Note that CRC is immune to this scenario, since they are specifically + * designed to detect low hamming distances. + * Prefer the Slab5 pattern generator for collisions on CRC algorithms. + */ + +#define SPARSE_LEVEL_MAX 15 + +/* Nb of combinations of m bits in a register of n bits */ +static double Cnm(int n, int m) +{ + assert(n > 0); + assert(m > 0); + assert(m <= m); + double acc = 1; + for (int i=0; i= SPARSE_LEVEL_MAX) return 0; + acc += (uint64_t)Cnm((int)srcBits, nbBitsSet); + } + return 1; +} + +typedef struct { + void* buffer; + size_t size; + sf_genMode mode; + /* sparse */ + size_t bitIdx[SPARSE_LEVEL_MAX]; + int level; + size_t maxBitIdx; + /* slab5 */ + size_t nbSlabs; + size_t current; + size_t prngSeed; +} sampleFactory; + +static void init_sampleFactory(sampleFactory* sf, uint64_t htotal) +{ + if (!enoughCombos(sf->size, htotal)) { + EXIT("sample size must be larger for this amount of hashes"); + } + + memset(sf->bitIdx, 0, sizeof(sf->bitIdx)); + sf->level = 0; + + unsigned char* const p = (unsigned char*)sf->buffer; + for (size_t n=0; nsize; n++) + p[n] = randomByte(sf->prngSeed + n); +} + +static sampleFactory* +create_sampleFactory(size_t size, uint64_t htotal, uint64_t seed) +{ + sampleFactory* const sf = malloc(sizeof(sampleFactory)); + if (!sf) EXIT("not enough memory"); + void* const buffer = malloc(size); + if (!buffer) EXIT("not enough memory"); + sf->buffer = buffer; + sf->size = size; + sf->mode = sf_sparse; + sf->maxBitIdx = size * 8; + sf->prngSeed = seed; + init_sampleFactory(sf, htotal); + return sf; +} + +static void free_sampleFactory(sampleFactory* sf) +{ + if (!sf) return; + free(sf->buffer); + free(sf); +} + +static void flipbit(void* buffer, uint64_t bitID) +{ + size_t const pos = bitID >> 3; + unsigned char const mask = (unsigned char)(1 << (bitID & 7)); + unsigned char* const p = (unsigned char*)buffer; + p[pos] ^= mask; +} + +static int updateBit(void* buffer, size_t* bitIdx, int level, size_t max) +{ + if (level==0) return 0; /* can't progress further */ + + flipbit(buffer, bitIdx[level]); /* erase previous bits */ + + if (bitIdx[level] < max-1) { /* simple case: go to next bit */ + bitIdx[level]++; + flipbit(buffer, bitIdx[level]); /* set new bit */ + return 1; + } + + /* reached last bit: need to update a bit from lower level */ + if (!updateBit(buffer, bitIdx, level-1, max-1)) return 0; + bitIdx[level] = bitIdx[level-1] + 1; + flipbit(buffer, bitIdx[level]); /* set new bit */ + return 1; +} + +static inline void update_sampleFactory(sampleFactory* sf) +{ + if (!updateBit(sf->buffer, sf->bitIdx, sf->level, sf->maxBitIdx)) { + /* no more room => move to next level */ + sf->level++; + assert(sf->level < SPARSE_LEVEL_MAX); + + /* set new bits */ + for (int i=1; i <= sf->level; i++) { + sf->bitIdx[i] = (size_t)(i-1); + flipbit(sf->buffer, sf->bitIdx[i]); + } + } +} + +#endif /* pattern generator selection */ + + +/* === Candidate Filter === */ + +typedef unsigned char Filter; + +Filter* create_Filter(int bflog) +{ + assert(bflog < 64 && bflog > 1); + size_t bfsize = (size_t)1 << bflog; + Filter* bf = malloc(bfsize); + assert(((void)"Filter creation failed", bf)); + memset(bf, 0, bfsize); + return bf; +} + +void free_Filter(Filter* bf) +{ + free(bf); +} + +#ifdef FILTER_1_PROBE + +/* + * Attach hash to a slot + * return: Nb of potential collision candidates detected + * 0: position not yet occupied + * 2: position previously occupied by a single candidate + * 1: position already occupied by multiple candidates + */ +inline int Filter_insert(Filter* bf, int bflog, uint64_t hash) +{ + int const slotNb = hash & 3; + int const shift = slotNb * 2 ; + + size_t const bfmask = ((size_t)1 << bflog) - 1; + size_t const pos = (hash >> 2) & bfmask; + + int const existingCandidates = ((((unsigned char*)bf)[pos]) >> shift) & 3; + + static const int addCandidates[4] = { 0, 2, 1, 1 }; + static const int nextValue[4] = { 1, 2, 3, 3 }; + + ((unsigned char*)bf)[pos] |= (unsigned char)(nextValue[existingCandidates] << shift); + return addCandidates[existingCandidates]; +} + +/* + * Check if provided 64-bit hash is a collision candidate + * Requires the slot to be occupied by at least 2 candidates. + * return >0 if hash is a collision candidate + * 0 otherwise (slot unoccupied, or only one candidate) + * note: unoccupied slots should not happen in this algorithm, + * since all hashes are supposed to have been inserted at least once. + */ +inline int Filter_check(const Filter* bf, int bflog, uint64_t hash) +{ + int const slotNb = hash & 3; + int const shift = slotNb * 2; + + size_t const bfmask = ((size_t)1 << bflog) - 1; + size_t const pos = (hash >> 2) & bfmask; + + return (((const unsigned char*)bf)[pos]) >> (shift+1) & 1; +} + +#else + +/* + * 2-probes strategy, + * more efficient at filtering candidates, + * requires filter size to be > nb of hashes + */ + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +/* + * Attach hash to 2 slots + * return: Nb of potential candidates detected + * 0: position not yet occupied + * 2: position previously occupied by a single candidate (at most) + * 1: position already occupied by multiple candidates + */ +static inline int Filter_insert(Filter* bf, int bflog, uint64_t hash) + { + hash = avalanche64(hash); + unsigned const slot1 = hash & 255; + hash >>= 8; + unsigned const slot2 = hash & 255; + hash >>= 8; + + size_t const fclmask = ((size_t)1 << (bflog-6)) - 1; + size_t const cacheLineNb = hash & fclmask; + + size_t const pos1 = (cacheLineNb << 6) + (slot1 >> 2); + unsigned const shift1 = (slot1 & 3) * 2; + unsigned const ex1 = (bf[pos1] >> shift1) & 3; + + size_t const pos2 = (cacheLineNb << 6) + (slot2 >> 2); + unsigned const shift2 = (slot2 & 3) * 2; + unsigned const ex2 = (bf[pos2] >> shift2) & 3; + + unsigned const existing = MIN(ex1, ex2); + + static const int addCandidates[4] = { 0, 2, 1, 1 }; + static const unsigned nextValue[4] = { 1, 2, 3, 3 }; + + bf[pos1] &= (Filter)(~(3 << shift1)); /* erase previous value */ + bf[pos1] |= (Filter)(MAX(ex1, nextValue[existing]) << shift1); + bf[pos2] |= (Filter)(MAX(ex2, nextValue[existing]) << shift2); + + return addCandidates[existing]; + } + + +/* + * Check if provided 64-bit hash is a collision candidate + * Requires the slot to be occupied by at least 2 candidates. + * return >0 if hash is a collision candidate + * 0 otherwise (slot unoccupied, or only one candidate) + * note: unoccupied slots should not happen in this algorithm, + * since all hashes are supposed to have been inserted at least once. + */ +static inline int Filter_check(const Filter* bf, int bflog, uint64_t hash) + { + hash = avalanche64(hash); + unsigned const slot1 = hash & 255; + hash >>= 8; + unsigned const slot2 = hash & 255; + hash >>= 8; + + size_t const fclmask = ((size_t)1 << (bflog-6)) - 1; + size_t const cacheLineNb = hash & fclmask; + + size_t const pos1 = (cacheLineNb << 6) + (slot1 >> 2); + unsigned const shift1 = (slot1 & 3) * 2; + unsigned const ex1 = (bf[pos1] >> shift1) & 3; + + size_t const pos2 = (cacheLineNb << 6) + (slot2 >> 2); + unsigned const shift2 = (slot2 & 3) * 2; + unsigned const ex2 = (bf[pos2] >> shift2) & 3; + + return (ex1 >= 2) && (ex2 >= 2); + } + +#endif // FILTER_1_PROBE + + +/* === Display === */ + +#include /* clock_t, clock, time_t, time, difftime */ + +void update_indicator(uint64_t v, uint64_t total) +{ + static clock_t start = 0; + if (start==0) start = clock(); + clock_t const updateRate = CLOCKS_PER_SEC / 2; + + clock_t const clockSpan = (clock_t)(clock() - start); + if (clockSpan > updateRate) { + start = clock(); + assert(v <= total); + assert(total > 0); + double share = ((double)v / (double)total) * 100; + printf("%6.2f%% (%llu) \r", share, (unsigned long long)v); + fflush(NULL); + } +} + +/* note: not thread safe */ +const char* displayDelay(double delay_s) +{ + static char delayString[50]; + memset(delayString, 0, sizeof(delayString)); + + int const mn = ((int)delay_s / 60) % 60; + int const h = (int)delay_s / 3600; + int const sec = (int)delay_s % 60; + + char* p = delayString; + if (h) sprintf(p, "%i h ", h); + if (mn || h) { + p = delayString + strlen(delayString); + sprintf(p, "%i mn ", mn); + } + p = delayString + strlen(delayString); + sprintf(p, "%is ", sec); + + return delayString; +} + + +/* === Math === */ + +static double power(uint64_t base, int p) +{ + double value = 1; + assert(p>=0); + for (int i=0; i>= 1) bitId++; + return bitId; +} + + +/* === Filter and search collisions === */ + +#undef NDEBUG /* ensure assert is not disabled */ +#include + +/* will recommend 24 billion samples for 64-bit hashes, + * expecting 18 collisions for a good 64-bit hash */ +#define NB_BITS_MAX 64 /* can't store nor analyze hash wider than 64-bits for the time being */ +uint64_t select_nbh(int nbBits) +{ + assert(nbBits > 0); + if (nbBits > NB_BITS_MAX) nbBits = NB_BITS_MAX; + double targetColls = (double)((128 + 17) - (nbBits * 2)); + uint64_t nbH = 24; + while (estimateNbCollisions(nbH, nbBits) < targetColls) nbH *= 2; + return nbH; +} + + +typedef struct { + uint64_t nbCollisions; +} searchCollisions_results; + +typedef struct { + uint64_t nbH; + uint64_t mask; + uint64_t maskSelector; + size_t sampleSize; + uint64_t prngSeed; + int filterLog; /* <0 = disable filter; 0 = auto-size; */ + int hashID; + int display; + int nbThreads; + searchCollisions_results* resultPtr; +} searchCollisions_parameters; + +#define DISPLAY(...) { if (display) printf(__VA_ARGS__); } + +static int isEqual(void* hTablePtr, size_t index1, size_t index2, Htype_e htype) +{ + if ((htype == ht64) || (htype == ht32)) { + uint64_t const h1 = ((const uint64_t*)hTablePtr)[index1]; + uint64_t const h2 = ((const uint64_t*)hTablePtr)[index2]; + return (h1 == h2); + } else { + assert(htype == ht128); + XXH128_hash_t const h1 = ((const XXH128_hash_t*)hTablePtr)[index1]; + XXH128_hash_t const h2 = ((const XXH128_hash_t*)hTablePtr)[index2]; + return XXH128_isEqual(h1, h2); + } +} + +static int isHighEqual(void* hTablePtr, size_t index1, size_t index2, Htype_e htype, int rShift) +{ + uint64_t h1, h2; + if ((htype == ht64) || (htype == ht32)) { + h1 = ((const uint64_t*)hTablePtr)[index1]; + h2 = ((const uint64_t*)hTablePtr)[index2]; + } else { + assert(htype == ht128); + h1 = ((const XXH128_hash_t*)hTablePtr)[index1].high64; + h2 = ((const XXH128_hash_t*)hTablePtr)[index2].high64; + assert(rShift >= 64); + rShift -= 64; + } + assert(0 <= rShift && rShift < 64); + return (h1 >> rShift) == (h2 >> rShift); +} + +/* assumption: (htype*)hTablePtr[index] is valid */ +static void addHashCandidate(void* hTablePtr, UniHash h, Htype_e htype, size_t index) +{ + if ((htype == ht64) || (htype == ht32)) { + ((uint64_t*)hTablePtr)[index] = h.h64; + } else { + assert(htype == ht128); + ((XXH128_hash_t*)hTablePtr)[index] = h.h128; + } +} + +static int getNbBits_fromHtype(Htype_e htype) { + switch(htype) { + case ht32: return 32; + case ht64: return 64; + case ht128:return 128; + default: EXIT("hash size not supported"); + } +} + +static Htype_e getHtype_fromHbits(int nbBits) { + switch(nbBits) { + case 32 : return ht32; + case 64 : return ht64; + case 128: return ht128; + default: EXIT("hash size not supported"); + } +} + +static size_t search_collisions( + searchCollisions_parameters param) +{ + uint64_t totalH = param.nbH; + const uint64_t hMask = param.mask; + const uint64_t hSelector = param.maskSelector; + int bflog = param.filterLog; + const int filter = (param.filterLog >= 0); + const size_t sampleSize = param.sampleSize; + const int hashID = param.hashID; + const Htype_e htype = getHtype_fromHbits(hashfnTable[hashID].bits); + const int display = param.display; + /* init */ + sampleFactory* const sf = create_sampleFactory(sampleSize, totalH, param.prngSeed); + if (!sf) EXIT("not enough memory"); + + //const char* const hname = hashfnTable[hashID].name; + hashfn const hfunction = hashfnTable[hashID].fn; + int const hwidth = hashfnTable[hashID].bits; + if (totalH == 0) totalH = select_nbh(hwidth); + if (bflog == 0) bflog = highestBitSet(totalH) + 1; /* auto-size filter */ + uint64_t const bfsize = (1ULL << bflog); + + + /* === filter hashes (optional) === */ + + Filter* bf = NULL; + uint64_t nbPresents = totalH; + + if (filter) { + time_t const filterTBegin = time(NULL); + DISPLAY(" Creating filter (%i GB) \n", (int)(bfsize >> 30)); + bf = create_Filter(bflog); + if (!bf) EXIT("not enough memory for filter"); + + + DISPLAY(" Generate %llu hashes from samples of %u bytes \n", + (unsigned long long)totalH, (unsigned)sampleSize); + nbPresents = 0; + + for (uint64_t n=0; n < totalH; n++) { + if (display && ((n&0xFFFFF) == 1) ) + update_indicator(n, totalH); + update_sampleFactory(sf); + + UniHash const h = hfunction(sf->buffer, sampleSize); + if ((h.h64 & hMask) != hSelector) continue; + + nbPresents += (uint64_t)Filter_insert(bf, bflog, h.h64); + } + + if (nbPresents==0) { + DISPLAY(" Analysis completed: No collision detected \n"); + if (param.resultPtr) param.resultPtr->nbCollisions = 0; + free_Filter(bf); + free_sampleFactory(sf); + return 0; + } + + { double const filterDelay = difftime(time(NULL), filterTBegin); + DISPLAY(" Generation and filter completed in %s, detected up to %llu candidates \n", + displayDelay(filterDelay), (unsigned long long) nbPresents); + } } + + + /* === store hash candidates: duplicates will be present here === */ + + time_t const storeTBegin = time(NULL); + size_t const hashByteSize = (htype == ht128) ? 16 : 8; + size_t const tableSize = (nbPresents+1) * hashByteSize; + assert(tableSize > nbPresents); /* check tableSize calculation overflow */ + DISPLAY(" Storing hash candidates (%i MB) \n", (int)(tableSize >> 20)); + + /* Generate and store hashes */ + void* const hashCandidates = malloc(tableSize); + if (!hashCandidates) EXIT("not enough memory to store candidates"); + init_sampleFactory(sf, totalH); + size_t nbCandidates = 0; + for (uint64_t n=0; n < totalH; n++) { + if (display && ((n&0xFFFFF) == 1) ) update_indicator(n, totalH); + update_sampleFactory(sf); + + UniHash const h = hfunction(sf->buffer, sampleSize); + if ((h.h64 & hMask) != hSelector) continue; + + if (filter) { + if (Filter_check(bf, bflog, h.h64)) { + assert(nbCandidates < nbPresents); + addHashCandidate(hashCandidates, h, htype, nbCandidates++); + } + } else { + assert(nbCandidates < nbPresents); + addHashCandidate(hashCandidates, h, htype, nbCandidates++); + } + } + if (nbCandidates < nbPresents) { + /* Try to mitigate gnuc_quicksort behavior, by reducing allocated memory, + * since gnuc_quicksort uses a lot of additional memory for mergesort */ + void* const checkPtr = realloc(hashCandidates, nbCandidates * hashByteSize); + assert(checkPtr != NULL); + assert(checkPtr == hashCandidates); /* simplification: since we are reducing the size, + * we hope to keep the same ptr position. + * Otherwise, hashCandidates must be mutable. */ + DISPLAY(" List of hashes reduced to %u MB from %u MB (saved %u MB) \n", + (unsigned)((nbCandidates * hashByteSize) >> 20), + (unsigned)(tableSize >> 20), + (unsigned)((tableSize - (nbCandidates * hashByteSize)) >> 20) ); + } + double const storeTDelay = difftime(time(NULL), storeTBegin); + DISPLAY(" Stored %llu hash candidates in %s \n", + (unsigned long long) nbCandidates, displayDelay(storeTDelay)); + free_Filter(bf); + free_sampleFactory(sf); + + + /* === step 3: look for duplicates === */ + time_t const sortTBegin = time(NULL); + DISPLAY(" Sorting candidates... "); + fflush(NULL); + if ((htype == ht64) || (htype == ht32)) { + /* + * Use C++'s std::sort, as it's faster than C stdlib's qsort, and + * doesn't suffer from gnuc_libsort's memory expansion + */ + sort64(hashCandidates, nbCandidates); + } else { + assert(htype == ht128); + sort128(hashCandidates, nbCandidates); /* sort with custom comparator */ + } + double const sortTDelay = difftime(time(NULL), sortTBegin); + DISPLAY(" Completed in %s \n", displayDelay(sortTDelay)); + + /* scan and count duplicates */ + time_t const countBegin = time(NULL); + DISPLAY(" Looking for duplicates: "); + fflush(NULL); + size_t collisions = 0; + for (size_t n=1; n nbCandidates * 100) /* within range for meaningfull collision analysis results */ + && (expectedCollisions > 18.0) ) { + int const rShift = hashBits - nbHBits; + size_t HBits_collisions = 0; + for (size_t n=1; n 2.0) DISPLAY("WARNING !!! ===> "); + DISPLAY(" high %i bits: %zu collision (%.1f expected): x%.2f \n", + nbHBits, HBits_collisions, expectedCollisions, collisionRatio); + if (collisionRatio > worstRatio) { + worstNbHBits = nbHBits; + worstRatio = collisionRatio; + } } } + DISPLAY("Worst collision ratio at %i high bits: x%.2f \n", + worstNbHBits, worstRatio); + } + double const countDelay = difftime(time(NULL), countBegin); + DISPLAY(" Completed in %s \n", displayDelay(countDelay)); + + /* clean and exit */ + free (hashCandidates); + +#if 0 /* debug */ + for (size_t n=0; nnbCollisions = collisions; + return collisions; +} + + + +#if defined(__MACH__) || defined(__linux__) +#include +static size_t getProcessMemUsage(int children) +{ + struct rusage stats; + if (getrusage(children ? RUSAGE_CHILDREN : RUSAGE_SELF, &stats) == 0) + return (size_t)stats.ru_maxrss; + return 0; +} +#else +static size_t getProcessMemUsage(int ignore) { return 0; } +#endif + +void time_collisions(searchCollisions_parameters param) +{ + uint64_t totalH = param.nbH; + int hashID = param.hashID; + int display = param.display; + + /* init */ + assert(0 <= hashID && hashID < HASH_FN_TOTAL); + //const char* const hname = hashfnTable[hashID].name; + int const hwidth = hashfnTable[hashID].bits; + if (totalH == 0) totalH = select_nbh(hwidth); + double const targetColls = estimateNbCollisions(totalH, hwidth); + + /* Start the timer to measure start/end of hashing + collision detection. */ + time_t const programTBegin = time(NULL); + + /* Generate hashes, and count collisions */ + size_t const collisions = search_collisions(param); + + /* display results */ + double const programTDelay = difftime(time(NULL), programTBegin); + size_t const programBytesSelf = getProcessMemUsage(0); + size_t const programBytesChildren = getProcessMemUsage(1); + DISPLAY("\n\n"); + DISPLAY("===> Found %llu collisions (x%.2f, %.1f expected) in %s\n", + (unsigned long long)collisions, + (double)collisions / targetColls, + targetColls, + displayDelay(programTDelay)); + if (programBytesSelf) + DISPLAY("===> MaxRSS(self) %zuMB, MaxRSS(children) %zuMB\n", + programBytesSelf>>20, + programBytesChildren>>20); + DISPLAY("------------------------------------------ \n"); +} + +// wrapper for pthread interface +void MT_searchCollisions(void* payload) +{ + search_collisions(*(searchCollisions_parameters*)payload); +} + +/* === Command Line === */ + +/*! + * readU64FromChar(): + * Allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to the position where it stopped reading. + */ +static uint64_t readU64FromChar(const char** stringPtr) +{ + static uint64_t const max = (((uint64_t)(-1)) / 10) - 1; + uint64_t result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + assert(result < max); + result *= 10; + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M') || (**stringPtr=='G')) { + uint64_t const maxK = ((uint64_t)(-1)) >> 10; + assert(result < maxK); + result <<= 10; + if ((**stringPtr=='M') || (**stringPtr=='G')) { + assert(result < maxK); + result <<= 10; + if (**stringPtr=='G') { + assert(result < maxK); + result <<= 10; + } + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + + +/** + * longCommandWArg(): + * Checks if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + assert(longCommand); assert(stringPtr); assert(*stringPtr); + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + + +#include "pool.h" + +/* + * As some hashes use different algorithms depending on input size, + * it can be necessary to test multiple input sizes + * to paint an accurate picture of collision performance + */ +#define SAMPLE_SIZE_DEFAULT 256 +#define HASHFN_ID_DEFAULT 0 + +void help(const char* exeName) +{ + printf("usage: %s [hashName] [opt] \n\n", exeName); + printf("list of hashNames:"); + printf("%s ", hashfnTable[0].name); + for (int i=1; i < HASH_FN_TOTAL; i++) { + printf(", %s ", hashfnTable[i].name); + } + printf(" \n"); + printf("Default hashName is %s\n", hashfnTable[HASHFN_ID_DEFAULT].name); + + printf(" \n"); + printf("Optional parameters: \n"); + printf(" --nbh=NB Select nb of hashes to generate (%llu by default) \n", (unsigned long long)select_nbh(64)); + printf(" --filter Activates the filter. Slower, but reduces memory usage for the same nb of hashes.\n"); + printf(" --threadlog=NB Use 2^NB threads.\n"); + printf(" --len=MB Set length of the input (%i bytes by default) \n", SAMPLE_SIZE_DEFAULT); +} + +int bad_argument(const char* exeName) +{ + printf("incorrect command: \n"); + help(exeName); + return 1; +} + + +int main(int argc, const char** argv) +{ + if (sizeof(size_t) < 8) return 1; // cannot work on systems without ability to allocate objects >= 4 GB + + assert(argc > 0); + const char* const exeName = argv[0]; + uint64_t totalH = 0; /* auto, based on nbBits */ + int bflog = 0; /* auto */ + int filter = 0; /* disabled */ + size_t sampleSize = SAMPLE_SIZE_DEFAULT; + int hashID = HASHFN_ID_DEFAULT; + int threadlog = 0; + uint64_t prngSeed = 0; + + int arg_nb; + for (arg_nb = 1; arg_nb < argc; arg_nb++) { + const char** arg = argv + arg_nb; + + if (!strcmp(*arg, "-h")) { help(exeName); return 0; } + if (longCommandWArg(arg, "-T")) { threadlog = (int)readU64FromChar(arg); continue; } + + if (!strcmp(*arg, "--filter")) { filter=1; continue; } + if (!strcmp(*arg, "--no-filter")) { filter=0; continue; } + + if (longCommandWArg(arg, "--seed")) { prngSeed = readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--nbh=")) { totalH = readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--filter=")) { filter=1; bflog = (int)readU64FromChar(arg); assert(bflog < 64); continue; } + if (longCommandWArg(arg, "--filterlog=")) { filter=1; bflog = (int)readU64FromChar(arg); assert(bflog < 64); continue; } + if (longCommandWArg(arg, "--size=")) { sampleSize = (size_t)readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--len=")) { sampleSize = (size_t)readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--threadlog=")) { threadlog = (int)readU64FromChar(arg); continue; } + + /* argument understood as hash name (must be correct) */ + int hnb; + for (hnb=0; hnb < HASH_FN_TOTAL; hnb++) { + if (!strcmp(*arg, hashfnTable[hnb].name)) { hashID = hnb; break; } + } + if (hnb == HASH_FN_TOTAL) return bad_argument(exeName); + } + + /* init */ + const char* const hname = hashfnTable[hashID].name; + int const hwidth = hashfnTable[hashID].bits; + if (totalH == 0) totalH = select_nbh(hwidth); + double const targetColls = estimateNbCollisions(totalH, hwidth); + if (bflog == 0) bflog = highestBitSet(totalH) + 1; /* auto-size filter */ + if (!filter) bflog = -1; // disable filter + + if (sizeof(size_t) < 8) + EXIT("This program has not been validated on architectures other than " + "64bit \n"); + + printf(" *** Collision tester for 64+ bit hashes *** \n\n"); + printf("Testing %s algorithm (%i-bit) \n", hname, hwidth); + printf("This program will allocate a lot of memory,\n"); + printf("generate %llu %i-bit hashes from samples of %u bytes, \n", + (unsigned long long)totalH, hwidth, (unsigned)sampleSize); + printf("and attempt to produce %.0f collisions. \n\n", targetColls); + + int const nbThreads = 1 << threadlog; + if (nbThreads <= 0) EXIT("Invalid --threadlog value."); + + if (nbThreads == 1) { + + searchCollisions_parameters params; + params.nbH = totalH; + params.mask = 0; + params.maskSelector = 0; + params.sampleSize = sampleSize; + params.filterLog = bflog; + params.hashID = hashID; + params.display = 1; + params.resultPtr = NULL; + params.prngSeed = prngSeed; + params.nbThreads = 1; + time_collisions(params); + + } else { /* nbThreads > 1 */ + + /* use multithreading */ + if (threadlog >= 30) EXIT("too many threads requested"); + if ((uint64_t)nbThreads > (totalH >> 16)) + EXIT("too many threads requested"); + if (bflog > 0 && threadlog > (bflog-10)) + EXIT("too many threads requested"); + printf("using %i threads ... \n", nbThreads); + + /* allocation */ + time_t const programTBegin = time(NULL); + POOL_ctx* const pt = POOL_create((size_t)nbThreads, 1); + if (!pt) EXIT("not enough memory for threads"); + searchCollisions_results* const MTresults = calloc (sizeof(searchCollisions_results), (size_t)nbThreads); + if (!MTresults) EXIT("not enough memory"); + searchCollisions_parameters* const MTparams = calloc (sizeof(searchCollisions_parameters), (size_t)nbThreads); + if (!MTparams) EXIT("not enough memory"); + + /* distribute jobs */ + for (int tnb=0; tnb Found %llu collisions (x%.2f, %.1f expected) in %s\n", + (unsigned long long)nbCollisions, + (double)nbCollisions / targetColls, + targetColls, + displayDelay(programTDelay)); + if (programBytesSelf) + printf("===> MaxRSS(self) %zuMB, MaxRSS(children) %zuMB\n", + programBytesSelf>>20, + programBytesChildren>>20); + printf("------------------------------------------ \n"); + + /* Clean up */ + free(MTparams); + free(MTresults); + } + + return 0; +} diff --git a/deps/xxHash/tests/collisions/pool.c b/deps/xxHash/tests/collisions/pool.c new file mode 100644 index 000000000..c0eaefd47 --- /dev/null +++ b/deps/xxHash/tests/collisions/pool.c @@ -0,0 +1,344 @@ +/* + * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +#include /* size_t */ +#include /* malloc, calloc, free */ +#include /* memcpy */ +#include + +#include "pool.h" + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* === Build Macro === */ + +#ifndef POOL_MT // can be defined on command line +# define POOL_MT 1 +#endif + + +/* === Implementation === */ + +#if POOL_MT + +#include "threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) +{ + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + if (ctx->queueSize == 1) { + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) +{ + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)calloc(1, sizeof(POOL_ctx)); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)malloc(ctx->queueSize * sizeof(POOL_job)); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)malloc(numThreads * sizeof(ZSTD_pthread_t)); + ctx->threadCapacity = 0; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + free(ctx->queue); + free(ctx->threads); + free(ctx); +} + + + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)malloc(numThreads * sizeof(ZSTD_pthread_t)); + if (!threadPool) return 1; + /* replace existing thread pool */ + memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + free(ctx->threads); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* POOL_MT not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_ctx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + (void)numThreads; + (void)queueSize; + return &g_ctx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_ctx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_ctx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/deps/xxHash/tests/collisions/pool.h b/deps/xxHash/tests/collisions/pool.h new file mode 100644 index 000000000..7c5e867d2 --- /dev/null +++ b/deps/xxHash/tests/collisions/pool.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include /* size_t */ + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/deps/xxHash/tests/collisions/sort.cc b/deps/xxHash/tests/collisions/sort.cc new file mode 100644 index 000000000..237a114fc --- /dev/null +++ b/deps/xxHash/tests/collisions/sort.cc @@ -0,0 +1,59 @@ +/* + * sort.cc - C++ sort functions + * Copyright (C) 2019-2020 Yann Collet + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * C++ sort functions tend to run faster than C ones due to templates allowing + * inline optimizations. + * Also, glibc's qsort() seems to inflate memory usage, resulting in OOM + * crashes on the test server. + */ + +#include // std::sort +#define XXH_INLINE_ALL // XXH128_cmp +#include + +#include "sort.hh" + +void sort64(uint64_t* table, size_t size) +{ + std::sort(table, table + size); +} + +#include // qsort + +void sort128(XXH128_hash_t* table, size_t size) +{ +#if 0 + // C++ sort using a custom function object + struct { + bool operator()(XXH128_hash_t a, XXH128_hash_t b) const + { + return XXH128_cmp(&a, &b); + } + } customLess; + std::sort(table, table + size, customLess); +#else + qsort(table, size, sizeof(*table), XXH128_cmp); +#endif +} diff --git a/deps/xxHash/tests/collisions/sort.hh b/deps/xxHash/tests/collisions/sort.hh new file mode 100644 index 000000000..278ce05c6 --- /dev/null +++ b/deps/xxHash/tests/collisions/sort.hh @@ -0,0 +1,40 @@ +/* + * sort.hh - headers for C++ sort functions + * Copyright (C) 2019-2020 Yann Collet + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at : + * - xxHash homepage : https://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include // size +#include // uint64_t +#define XXH_STATIC_LINKING_ONLY // XXH128_hash_t +#include "xxhash.h" + +void sort64(uint64_t* table, size_t size); + +void sort128(XXH128_hash_t* table, size_t size); + +#ifdef __cplusplus +} // extern C +#endif diff --git a/deps/xxHash/tests/collisions/threading.c b/deps/xxHash/tests/collisions/threading.c new file mode 100644 index 000000000..516466719 --- /dev/null +++ b/deps/xxHash/tests/collisions/threading.c @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + + + /* === Build Macro === */ + + #ifndef POOL_MT // can be defined on command line + # define POOL_MT 1 + #endif + + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useles_symbol; + +#if POOL_MT && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper + */ + + +/* === Dependencies === */ +#include +#include +#include "threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return (int)GetLastError(); + } +} + +#endif /* POOL_MT */ diff --git a/deps/xxHash/tests/collisions/threading.h b/deps/xxHash/tests/collisions/threading.h new file mode 100644 index 000000000..700bf4426 --- /dev/null +++ b/deps/xxHash/tests/collisions/threading.h @@ -0,0 +1,124 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* === Build Macro === */ + +#ifndef POOL_MT // can be defined on command line +# define POOL_MT 1 +#endif + + +/* === Implementation === */ + +#if POOL_MT && defined(_WIN32) + +/** + * Define windows version before include + */ +#undef WINVER +#define WINVER 0x0600 + +#undef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include +#include + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif POOL_MT /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* POOL_MT == 0 */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* POOL_MT */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/deps/xxHash/tests/generate_unicode_test.c b/deps/xxHash/tests/generate_unicode_test.c new file mode 100644 index 000000000..eed6ac01a --- /dev/null +++ b/deps/xxHash/tests/generate_unicode_test.c @@ -0,0 +1,154 @@ +/* + * Generates a Unicode test for xxhsum without using Unicode in the source files. + * + * Copyright (C) 2020 Devin Hussey (easyaspi314) + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Certain terminals don't properly handle UTF-8 (i.e. rxvt and command prompt + * in the default codepage), and that can cause issues when editing text. + * + * We use this C file to generate a file with a Unicode filename, a file with + * a checksum of said file, and both a Windows batch script and a Unix shell + * script to test the file. + */ + +#define _CRT_SECURE_NO_WARNINGS /* Silence warnings on MSVC */ +#include + +/* Use a Japanese filename, something that can't be cheated with ANSI. + * yuniko-do.unicode (literally unicode.unicode) */ + +/* Use raw hex values to ensure that the output is well-formed UTF-8. It is also more C90 compliant. */ +static const char FILENAME[] = { + (char)0xe3, (char)0x83, (char)0xa6, /* U+30e6: Katakana letter yu */ + (char)0xe3, (char)0x83, (char)0x8b, /* U+30cb: Katakana letter ni */ + (char)0xe3, (char)0x82, (char)0xb3, /* U+30b3: Katakana letter ko */ + (char)0xe3, (char)0x83, (char)0xbc, /* U+30fc: Katakana-Hiragana prolonged sound mark (dash) */ + (char)0xe3, (char)0x83, (char)0x89, /* U+30c9: Katakana letter do */ + '.','u','n','i','c','o','d','e','\0' /* ".unicode" (so we can glob in make clean and .gitignore) */ +}; + +#ifdef _WIN32 +/* The same text as above, but encoded in Windows UTF-16. */ +static const wchar_t WFILENAME[] = { 0x30e6, 0x30cb, 0x30b3, 0x30fc, 0x30c9, L'.', L'u', L'n', L'i', L'c', L'o', L'd', L'e', L'\0' }; +#endif + +int main(void) +{ + FILE *f, *script, *checksum; + + /* Create our Unicode file. Use _wfopen on Windows as fopen doesn't support Unicode filenames. */ +#ifdef _WIN32 + if (!(f = _wfopen(WFILENAME, L"wb"))) return 1; +#else + if (!(f = fopen(FILENAME, "wb"))) return 1; +#endif + fprintf(f, "test\n"); + fclose(f); + + /* XXH64 checksum file with the precalculated checksum for said file. */ + if (!(checksum = fopen("unicode_test.xxh64", "wb"))) + return 1; + fprintf(checksum, "2d7f1808da1fa63c %s\n", FILENAME); + fclose(checksum); + + + /* Create two scripts for both Windows and Unix. */ + + /* Generate a Windows batch script. Always insert CRLF manually. */ + if (!(script = fopen("unicode_test.bat", "wb"))) + return 1; + + /* Disable echoing the commands. We do that ourselves the naive way. */ + fprintf(script, "@echo off\r\n"); + + /* Change to codepage 65001 to enable UTF-8 support. */ + fprintf(script, "chcp 65001 >NUL 2>&1\r\n"); + + /* First test a Unicode filename */ + fprintf(script, "echo Testing filename provided on command line...\r\n"); + fprintf(script, "echo xxhsum.exe \"%s\"\r\n", FILENAME); + fprintf(script, "xxhsum.exe \"%s\"\r\n", FILENAME); + + /* Bail on error */ + fprintf(script, "if %%ERRORLEVEL%% neq 0 (\r\n"); + fprintf(script, " exit /B %%ERRORLEVEL%%\r\n"); + fprintf(script, ")\r\n"); + + /* Then test a checksum file. */ + fprintf(script, "echo Testing a checksum file...\r\n"); + fprintf(script, "echo xxhsum.exe -c unicode_test.xxh64\r\n"); + fprintf(script, "xxhsum.exe -c unicode_test.xxh64\r\n"); + + fprintf(script, "exit /B %%ERRORLEVEL%%\r\n"); + + fclose(script); + + /* Generate a Unix shell script */ + if (!(script = fopen("unicode_test.sh", "wb"))) + return 1; + + fprintf(script, "#!/bin/sh\n"); + /* + * Some versions of MSYS, MinGW and Cygwin do not support UTF-8, and the ones that + * don't may error with something like this: + * + * Error: Could not open '.unicode': No such file or directory. + * + * which is an internal error that happens when it tries to convert MinGW/Cygwin + * paths to Windows paths. + * + * In that case, we bail to cmd.exe and the batch script, which supports UTF-8 + * on Windows 7 and later. + */ + fprintf(script, "case $(uname) in\n"); + /* MinGW/MSYS converts /c to C:\ unless you have a double slash, + * Cygwin does not. */ + fprintf(script, " *CYGWIN*)\n"); + fprintf(script, " exec cmd.exe /c unicode_test.bat\n"); + fprintf(script, " ;;\n"); + fprintf(script, " *MINGW*|*MSYS*)\n"); + fprintf(script, " exec cmd.exe //c unicode_test.bat\n"); + fprintf(script, " ;;\n"); + fprintf(script, "esac\n"); + + /* First test a Unicode filename */ + fprintf(script, "echo Testing filename provided on command line...\n"); + fprintf(script, "echo './xxhsum \"%s\" || exit $?'\n", FILENAME); + fprintf(script, "./xxhsum \"%s\" || exit $?\n", FILENAME); + + /* Then test a checksum file. */ + fprintf(script, "echo Testing a checksum file...\n"); + fprintf(script, "echo './xxhsum -c unicode_test.xxh64 || exit $?'\n"); + fprintf(script, "./xxhsum -c unicode_test.xxh64 || exit $?\n"); + + fclose(script); + + return 0; +} diff --git a/deps/xxHash/tests/multiInclude.c b/deps/xxHash/tests/multiInclude.c new file mode 100644 index 000000000..7d2bc8a9f --- /dev/null +++ b/deps/xxHash/tests/multiInclude.c @@ -0,0 +1,66 @@ +/* + * Multi-include test program + * Validates that xxhash.h can be included multiple times and in any order + * + * Copyright (C) 2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#include /* printf */ + +/* Normal include, gives access to public symbols */ +#include "../xxhash.h" + +/* + * Advanced include, gives access to experimental symbols + * This test ensure that xxhash.h can be included multiple times and in any + * order. This order is more difficult: Without care, the declaration of + * experimental symbols could be skipped. + */ +#define XXH_STATIC_LINKING_ONLY +#include "../xxhash.h" + +/* + * Inlining: Re-define all identifiers, keep them private to the unit. + * Note: Without specific efforts, the identifier names would collide. + * + * To be linked with and without xxhash.o to test the symbol's presence and + * naming collisions. + */ +#define XXH_INLINE_ALL +#include "../xxhash.h" + + +int main(void) +{ + XXH3_state_t state; /* part of experimental API */ + + XXH3_64bits_reset(&state); + const char input[] = "Hello World !"; + + XXH3_64bits_update(&state, input, sizeof(input)); + + XXH64_hash_t const h = XXH3_64bits_digest(&state); + printf("hash '%s': %08x%08x \n", input, (unsigned)(h >> 32), (unsigned)h); + + return 0; +} diff --git a/deps/xxHash/tests/ppc_define.c b/deps/xxHash/tests/ppc_define.c new file mode 100644 index 000000000..d94c2c724 --- /dev/null +++ b/deps/xxHash/tests/ppc_define.c @@ -0,0 +1,62 @@ +/* + * Multi-include test program + * ensure that pixel, bool and vector are not redefined + * + * Copyright (C) 2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* gcc's altivec.h, included for the VSX code path, + * may, in some circumstances, redefine + * bool, vector and pixel keywords. + * + * This unit checks if it happens. + * It's a compile test. + * The test is mostly meaningful for PPC target using altivec.h + * hence XXH_VECTOR == XXH_VSX + */ + +#define BOOL_VALUE 32123456 +#define bool BOOL_VALUE + +#define VECTOR_VALUE 374464784 +#define vector VECTOR_VALUE + +#define PIXEL_VALUE 5846841 +#define pixel PIXEL_VALUE + +#define XXH_INLINE_ALL +#include "../xxhash.h" + +#if (bool != BOOL_VALUE) +# error "bool macro was redefined !" +#endif + +#if (vector != VECTOR_VALUE) +# error "vector macro was redefined !" +#endif + +#if (pixel != PIXEL_VALUE) +# error "pixel macro was redefined !" +#endif + +int g_nonEmptyUnit = 0; diff --git a/deps/xxHash/xxh3.h b/deps/xxHash/xxh3.h new file mode 100644 index 000000000..7e83e6418 --- /dev/null +++ b/deps/xxHash/xxh3.h @@ -0,0 +1,55 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Development source file for `xxh3` + * Copyright (C) 2019-2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * Note: This file used to host the source code of XXH3_* variants. + * during the development period. + * The source code is now properly integrated within xxhash.h. + * + * xxh3.h is no longer useful, + * but it is still provided for compatibility with source code + * which used to include it directly. + * + * Programs are now highly discourage to include xxh3.h. + * Include `xxhash.h` instead, which is the officially supported interface. + * + * In the future, xxh3.h will start to generate warnings, then errors, + * then it will be removed from source package and from include directory. + */ + +/* Simulate the same impact as including the old xxh3.h source file */ + +#define XXH_INLINE_ALL +#include "xxhash.h" diff --git a/deps/xxHash/xxh_x86dispatch.c b/deps/xxHash/xxh_x86dispatch.c new file mode 100644 index 000000000..a618ae898 --- /dev/null +++ b/deps/xxHash/xxh_x86dispatch.c @@ -0,0 +1,749 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +#if defined (__cplusplus) +extern "C" { +#endif + +/* + * Dispatcher code for XXH3 on x86-based targets. + */ +#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)) +# error "Dispatching is currently only supported on x86 and x86_64." +#endif + +#ifndef __GNUC__ +# error "Dispatching requires __attribute__((__target__)) capability" +#endif + +#define XXH_DISPATCH_AVX2 /* enable dispatch towards AVX2 */ +#define XXH_DISPATCH_AVX512 /* enable dispatch towards AVX512 */ + +#ifdef XXH_DISPATCH_DEBUG +/* debug logging */ +# include +# define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); } +#else +# define XXH_debugPrint(str) ((void)0) +# undef NDEBUG /* avoid redefinition */ +# define NDEBUG +#endif +#include + +#if defined(__GNUC__) +# include /* sse2 */ +# include /* avx2 */ +#elif defined(_MSC_VER) +# include +#endif + +#define XXH_INLINE_ALL +#define XXH_X86DISPATCH +#define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) +#define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) +#define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) +#include "xxhash.h" + +/* + * Modified version of Intel's guide + * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + */ +#if defined(_MSC_VER) +# include +#endif + +/* + * Support both AT&T and Intel dialects + * + * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if + * compiled with -masm=intel. Instead, it supports dialect switching with + * curly braces: { AT&T syntax | Intel syntax } + * + * Clang's integrated assembler automatically converts AT&T syntax to Intel if + * needed, making the dialect switching useless (it isn't even supported). + * + * Note: Comments are written in the inline assembly itself. + */ +#ifdef __clang__ +# define I_ATT(intel, att) att "\n\t" +#else +# define I_ATT(intel, att) "{" att "|" intel "}\n\t" +#endif + + +static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) +{ +#if defined(_MSC_VER) + __cpuidex(abcd, eax, ecx); +#else + xxh_u32 ebx, edx; +# if defined(__i386__) && defined(__PIC__) + __asm__( + "# Call CPUID\n\t" + "#\n\t" + "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t" + "# EBX, so we use EDI instead.\n\t" + I_ATT("mov edi, ebx", "movl %%ebx, %%edi") + I_ATT("cpuid", "cpuid" ) + I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi") + : "=D" (ebx), +# else + __asm__( + "# Call CPUID\n\t" + I_ATT("cpuid", "cpuid") + : "=b" (ebx), +# endif + "+a" (eax), "+c" (ecx), "=d" (edx)); + abcd[0] = eax; + abcd[1] = ebx; + abcd[2] = ecx; + abcd[3] = edx; +#endif +} + +#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) +/* + * While the CPU may support AVX2, the operating system might not properly save + * the full YMM/ZMM registers. + * + * xgetbv is used for detecting this: Any compliant operating system will define + * a set of flags in the xcr0 register indicating how it saves the AVX registers. + * + * You can manually disable this flag on Windows by running, as admin: + * + * bcdedit.exe /set xsavedisable 1 + * + * and rebooting. Run the same command with 0 to re-enable it. + */ +static xxh_u64 XXH_xgetbv(void) +{ +#if defined(_MSC_VER) + return _xgetbv(0); /* min VS2010 SP1 compiler is required */ +#else + xxh_u32 xcr0_lo, xcr0_hi; + __asm__( + "# Call XGETBV\n\t" + "#\n\t" + "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t" + "# the XGETBV opcode, so we encode it by hand instead.\n\t" + "# See for details.\n\t" + ".byte 0x0f, 0x01, 0xd0\n\t" + : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); + return xcr0_lo | ((xxh_u64)xcr0_hi << 32); +#endif +} +#endif + +#define SSE2_CPUID_MASK (1 << 26) +#define OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27)) +#define AVX2_CPUID_MASK (1 << 5) +#define AVX2_XGETBV_MASK ((1 << 2) | (1 << 1)) +#define AVX512F_CPUID_MASK (1 << 16) +#define AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1)) + +/* Returns the best XXH3 implementation */ +static int XXH_featureTest(void) +{ + xxh_u32 abcd[4]; + xxh_u32 max_leaves; + int best = XXH_SCALAR; +#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) + xxh_u64 xgetbv_val; +#endif +#if defined(__GNUC__) && defined(__i386__) + xxh_u32 cpuid_supported; + __asm__( + "# For the sake of ruthless backwards compatibility, check if CPUID\n\t" + "# is supported in the EFLAGS on i386.\n\t" + "# This is not necessary on x86_64 - CPUID is mandatory.\n\t" + "# The ID flag (bit 21) in the EFLAGS register indicates support\n\t" + "# for the CPUID instruction. If a software procedure can set and\n\t" + "# clear this flag, the processor executing the procedure supports\n\t" + "# the CPUID instruction.\n\t" + "# \n\t" + "#\n\t" + "# Routine is from .\n\t" + + "# Save EFLAGS\n\t" + I_ATT("pushfd", "pushfl" ) + "# Store EFLAGS\n\t" + I_ATT("pushfd", "pushfl" ) + "# Invert the ID bit in stored EFLAGS\n\t" + I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)") + "# Load stored EFLAGS (with ID bit inverted)\n\t" + I_ATT("popfd", "popfl" ) + "# Store EFLAGS again (ID bit may or not be inverted)\n\t" + I_ATT("pushfd", "pushfl" ) + "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t" + I_ATT("pop eax", "popl %%eax" ) + "# eax = whichever bits were changed\n\t" + I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" ) + "# Restore original EFLAGS\n\t" + I_ATT("popfd", "popfl" ) + "# eax = zero if ID bit can't be changed, else non-zero\n\t" + I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" ) + : "=a" (cpuid_supported) :: "cc"); + + if (XXH_unlikely(!cpuid_supported)) { + XXH_debugPrint("CPUID support is not detected!"); + return best; + } + +#endif + /* Check how many CPUID pages we have */ + XXH_cpuid(0, 0, abcd); + max_leaves = abcd[0]; + + /* Shouldn't happen on hardware, but happens on some QEMU configs. */ + if (XXH_unlikely(max_leaves == 0)) { + XXH_debugPrint("Max CPUID leaves == 0!"); + return best; + } + + /* Check for SSE2, OSXSAVE and xgetbv */ + XXH_cpuid(1, 0, abcd); + + /* + * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt. + */ + if (XXH_unlikely((abcd[3] & SSE2_CPUID_MASK) != SSE2_CPUID_MASK)) + return best; + + XXH_debugPrint("SSE2 support detected."); + + best = XXH_SSE2; +#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) + /* Make sure we have enough leaves */ + if (XXH_unlikely(max_leaves < 7)) + return best; + + /* Test for OSXSAVE and XGETBV */ + if ((abcd[2] & OSXSAVE_CPUID_MASK) != OSXSAVE_CPUID_MASK) + return best; + + /* CPUID check for AVX features */ + XXH_cpuid(7, 0, abcd); + + xgetbv_val = XXH_xgetbv(); +#if defined(XXH_DISPATCH_AVX2) + /* Validate that AVX2 is supported by the CPU */ + if ((abcd[1] & AVX2_CPUID_MASK) != AVX2_CPUID_MASK) + return best; + + /* Validate that the OS supports YMM registers */ + if ((xgetbv_val & AVX2_XGETBV_MASK) != AVX2_XGETBV_MASK) { + XXH_debugPrint("AVX2 supported by the CPU, but not the OS."); + return best; + } + + /* AVX2 supported */ + XXH_debugPrint("AVX2 support detected."); + best = XXH_AVX2; +#endif +#if defined(XXH_DISPATCH_AVX512) + /* Check if AVX512F is supported by the CPU */ + if ((abcd[1] & AVX512F_CPUID_MASK) != AVX512F_CPUID_MASK) { + XXH_debugPrint("AVX512F not supported by CPU"); + return best; + } + + /* Validate that the OS supports ZMM registers */ + if ((xgetbv_val & AVX512F_XGETBV_MASK) != AVX512F_XGETBV_MASK) { + XXH_debugPrint("AVX512F supported by the CPU, but not the OS."); + return best; + } + + /* AVX512F supported */ + XXH_debugPrint("AVX512F support detected."); + best = XXH_AVX512; +#endif +#endif + return best; +} + + +/* === Vector implementations === */ + +/* === XXH3, default variants === */ + +XXH_NO_INLINE XXH64_hash_t +XXHL64_default_scalar(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t +XXHL64_default_sse2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t +XXHL64_default_avx2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t +XXHL64_default_avx512(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH3, Seeded variants === */ + +XXH_NO_INLINE XXH64_hash_t +XXHL64_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t +XXHL64_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t +XXHL64_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t +XXHL64_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512); +} +#endif + +/* === XXH3, Secret variants === */ + +XXH_NO_INLINE XXH64_hash_t +XXHL64_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t +XXHL64_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t +XXHL64_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t +XXHL64_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH3 update variants === */ + +XXH_NO_INLINE XXH_errorcode +XXH3_64bits_update_scalar(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode +XXH3_64bits_update_sse2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode +XXH3_64bits_update_avx2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode +XXH3_64bits_update_avx512(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH128 default variants === */ + +XXH_NO_INLINE XXH128_hash_t +XXHL128_default_scalar(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t +XXHL128_default_sse2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t +XXHL128_default_avx2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t +XXHL128_default_avx512(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH128 Secret variants === */ + +XXH_NO_INLINE XXH128_hash_t +XXHL128_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t +XXHL128_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t +XXHL128_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t +XXHL128_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH128 Seeded variants === */ + +XXH_NO_INLINE XXH128_hash_t +XXHL128_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t +XXHL128_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t +XXHL128_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t +XXHL128_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512); +} +#endif + +/* === XXH128 update variants === */ + +XXH_NO_INLINE XXH_errorcode +XXH3_128bits_update_scalar(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode +XXH3_128bits_update_sse2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode +XXH3_128bits_update_avx2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode +XXH3_128bits_update_avx512(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* ==== Dispatchers ==== */ + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t); + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); + +typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t); + +typedef struct { + XXH3_dispatchx86_hashLong64_default hashLong64_default; + XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed; + XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret; + XXH3_dispatchx86_update update; +} dispatchFunctions_s; + +static dispatchFunctions_s g_dispatch = { NULL, NULL, NULL, NULL}; + +#define NB_DISPATCHES 4 +static const dispatchFunctions_s k_dispatch[NB_DISPATCHES] = { + /* scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_64bits_update_scalar }, + /* sse2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_64bits_update_sse2 }, +#ifdef XXH_DISPATCH_AVX2 + /* avx2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_64bits_update_avx2 }, +#else + /* avx2 */ { NULL, NULL, NULL, NULL }, +#endif +#ifdef XXH_DISPATCH_AVX512 + /* avx512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_64bits_update_avx512 } +#else + /* avx512 */ { NULL, NULL, NULL, NULL } +#endif +}; + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t); + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); + +typedef struct { + XXH3_dispatchx86_hashLong128_default hashLong128_default; + XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed; + XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret; + XXH3_dispatchx86_update update; +} dispatch128Functions_s; + +static dispatch128Functions_s g_dispatch128 = { NULL, NULL, NULL, NULL }; + +static const dispatch128Functions_s k_dispatch128[NB_DISPATCHES] = { + /* scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_128bits_update_scalar }, + /* sse2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_128bits_update_sse2 }, +#ifdef XXH_DISPATCH_AVX2 + /* avx2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_128bits_update_avx2 }, +#else + /* avx2 */ { NULL, NULL, NULL, NULL }, +#endif +#ifdef XXH_DISPATCH_AVX512 + /* avx512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_128bits_update_avx512 } +#else + /* avx512 */ { NULL, NULL, NULL, NULL } +#endif +}; + +static void setDispatch(void) +{ + int vecID = XXH_featureTest(); + XXH_STATIC_ASSERT(XXH_AVX512 == NB_DISPATCHES-1); + assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512); +#ifndef XXH_DISPATCH_AVX512 + assert(vecID != XXH_AVX512); +#endif +#ifndef XXH_DISPATCH_AVX2 + assert(vecID != XXH_AVX2); +#endif + g_dispatch = k_dispatch[vecID]; + g_dispatch128 = k_dispatch128[vecID]; +} + + +/* ==== XXH3 public functions ==== */ + +static XXH64_hash_t +XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + if (g_dispatch.hashLong64_default == NULL) setDispatch(); + return g_dispatch.hashLong64_default(input, len); +} + +XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len) +{ + return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection); +} + +static XXH64_hash_t +XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len, + XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + if (g_dispatch.hashLong64_seed == NULL) setDispatch(); + return g_dispatch.hashLong64_seed(input, len, seed64); +} + +XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection); +} + +static XXH64_hash_t +XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) +{ + (void)seed64; + if (g_dispatch.hashLong64_secret == NULL) setDispatch(); + return g_dispatch.hashLong64_secret(input, len, secret, secretLen); +} + +XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection); +} + +XXH_errorcode +XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) +{ + if (g_dispatch.update == NULL) setDispatch(); + return g_dispatch.update(state, (const xxh_u8*)input, len); +} + + +/* ==== XXH128 public functions ==== */ + +static XXH128_hash_t +XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + if (g_dispatch128.hashLong128_default == NULL) setDispatch(); + return g_dispatch128.hashLong128_default(input, len); +} + +XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len) +{ + return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection); +} + +static XXH128_hash_t +XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + if (g_dispatch128.hashLong128_seed == NULL) setDispatch(); + return g_dispatch128.hashLong128_seed(input, len, seed64); +} + +XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection); +} + +static XXH128_hash_t +XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)seed64; + if (g_dispatch128.hashLong128_secret == NULL) setDispatch(); + return g_dispatch128.hashLong128_secret(input, len, secret, secretLen); +} + +XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection); +} + +XXH_errorcode +XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) +{ + if (g_dispatch128.update == NULL) setDispatch(); + return g_dispatch128.update(state, (const xxh_u8*)input, len); +} + +#if defined (__cplusplus) +} +#endif diff --git a/deps/xxHash/xxh_x86dispatch.h b/deps/xxHash/xxh_x86dispatch.h new file mode 100644 index 000000000..6bc17bcbb --- /dev/null +++ b/deps/xxHash/xxh_x86dispatch.h @@ -0,0 +1,86 @@ +/* + * xxHash - XXH3 Dispatcher for x86-based targets + * Copyright (C) 2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#ifndef XXH_X86DISPATCH_H_13563687684 +#define XXH_X86DISPATCH_H_13563687684 + +#include "xxhash.h" /* XXH64_hash_t, XXH3_state_t */ + +#if defined (__cplusplus) +extern "C" { +#endif + +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len); + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len); + +#if defined (__cplusplus) +} +#endif + + +/* automatic replacement of XXH3 functions. + * can be disabled by setting XXH_DISPATCH_DISABLE_REPLACE */ +#ifndef XXH_DISPATCH_DISABLE_REPLACE + +# undef XXH3_64bits +# define XXH3_64bits XXH3_64bits_dispatch +# undef XXH3_64bits_withSeed +# define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch +# undef XXH3_64bits_withSecret +# define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch +# undef XXH3_64bits_update +# define XXH3_64bits_update XXH3_64bits_update_dispatch + +# undef XXH128 +# define XXH128 XXH3_128bits_withSeed_dispatch +# define XXH3_128bits XXH3_128bits_dispatch +# undef XXH3_128bits +# define XXH3_128bits XXH3_128bits_dispatch +# undef XXH3_128bits_withSeed +# define XXH3_128bits_withSeed XXH3_128bits_withSeed_dispatch +# undef XXH3_128bits_withSecret +# define XXH3_128bits_withSecret XXH3_128bits_withSecret_dispatch +# undef XXH3_128bits_update +# define XXH3_128bits_update XXH3_128bits_update_dispatch + +#endif /* XXH_DISPATCH_DISABLE_REPLACE */ + +#endif /* XXH_X86DISPATCH_H_13563687684 */ diff --git a/deps/xxHash/xxhash.c b/deps/xxHash/xxhash.c index da06ea72b..0fae88c5d 100644 --- a/deps/xxHash/xxhash.c +++ b/deps/xxHash/xxhash.c @@ -1,1029 +1,43 @@ /* -* xxHash - Fast Hash algorithm -* Copyright (C) 2012-2016, Yann Collet -* -* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are -* met: -* -* * Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* * Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following disclaimer -* in the documentation and/or other materials provided with the -* distribution. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -* -* You can contact the author at : -* - xxHash homepage: http://www.xxhash.com -* - xxHash source repository : https://github.com/Cyan4973/xxHash -*/ - - -/* ************************************* -* Tuning parameters -***************************************/ -/*!XXH_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. - * It can generate buggy code on targets which do not support unaligned memory accesses. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://stackoverflow.com/a/32095106/646947 for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ - || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define XXH_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ - || defined(__ARM_ARCH_7S__) )) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -/*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. - * When this macro is enabled, xxHash actively checks input for null pointer. - * It it is, result for null input pointers is the same as a null-length input. + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2012-2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash */ -#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ -# define XXH_ACCEPT_NULL_INPUT_POINTER 0 -#endif -/*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. - * Results are therefore identical for little-endian and big-endian CPU. - * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independence be of no importance for your application, you may set the #define below to 1, - * to improve speed for Big-endian CPU. - * This option has no impact on Little_Endian CPU. - */ -#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ -# define XXH_FORCE_NATIVE_FORMAT 0 -#endif -/*!XXH_FORCE_ALIGN_CHECK : - * This is a minor performance trick, only useful with lots of very small keys. - * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; - * set it to 0 when the input is guaranteed to be aligned, - * or when alignment doesn't matter for performance. +/* + * xxhash.c instantiates functions defined in xxhash.h */ -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ -# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif +#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */ +#define XXH_IMPLEMENTATION /* access definitions */ -/* ************************************* -* Includes & Memory related functions -***************************************/ -/*! Modify the local functions below should you wish to use some other memory routines -* for malloc(), free() */ -#include -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -/*! and for memcpy() */ -#include -static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - -#include /* assert */ - -#define XXH_STATIC_LINKING_ONLY #include "xxhash.h" - - -/* ************************************* -* Compiler Specific Options -***************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# define FORCE_INLINE static __forceinline -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - -/* ************************************* -* Basic Types -***************************************/ -#ifndef MEM_MODULE -# if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; -# endif -#endif - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; } __attribute__((packed)) unalign; -static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } - -#else - -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ -static U32 XXH_read32(const void* memPtr) -{ - U32 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - - -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) -#endif - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -#else -static U32 XXH_swap32 (U32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -#endif - - -/* ************************************* -* Architecture Macros -***************************************/ -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; - -/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ -#ifndef XXH_CPU_LITTLE_ENDIAN -static int XXH_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} -# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() -#endif - - -/* *************************** -* Memory reads -*****************************/ -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); - else - return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); -} - -FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE32_align(ptr, endian, XXH_unaligned); -} - -static U32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); -} - - -/* ************************************* -* Macros -***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - - -/* ******************************************************************* -* 32-bit hash functions -*********************************************************************/ -static const U32 PRIME32_1 = 2654435761U; -static const U32 PRIME32_2 = 2246822519U; -static const U32 PRIME32_3 = 3266489917U; -static const U32 PRIME32_4 = 668265263U; -static const U32 PRIME32_5 = 374761393U; - -static U32 XXH32_round(U32 seed, U32 input) -{ - seed += input * PRIME32_2; - seed = XXH_rotl32(seed, 13); - seed *= PRIME32_1; - return seed; -} - -/* mix all bits */ -static U32 XXH32_avalanche(U32 h32) -{ - h32 ^= h32 >> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - return(h32); -} - -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) - -static U32 -XXH32_finalize(U32 h32, const void* ptr, size_t len, - XXH_endianess endian, XXH_alignment align) - -{ - const BYTE* p = (const BYTE*)ptr; -#define PROCESS1 \ - h32 += (*p) * PRIME32_5; \ - p++; \ - h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; - -#define PROCESS4 \ - h32 += XXH_get32bits(p) * PRIME32_3; \ - p+=4; \ - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - - switch(len&15) /* or switch(bEnd - p) */ - { - case 12: PROCESS4; - /* fallthrough */ - case 8: PROCESS4; - /* fallthrough */ - case 4: PROCESS4; - return XXH32_avalanche(h32); - - case 13: PROCESS4; - /* fallthrough */ - case 9: PROCESS4; - /* fallthrough */ - case 5: PROCESS4; - PROCESS1; - return XXH32_avalanche(h32); - - case 14: PROCESS4; - /* fallthrough */ - case 10: PROCESS4; - /* fallthrough */ - case 6: PROCESS4; - PROCESS1; - PROCESS1; - return XXH32_avalanche(h32); - - case 15: PROCESS4; - /* fallthrough */ - case 11: PROCESS4; - /* fallthrough */ - case 7: PROCESS4; - /* fallthrough */ - case 3: PROCESS1; - /* fallthrough */ - case 2: PROCESS1; - /* fallthrough */ - case 1: PROCESS1; - /* fallthrough */ - case 0: return XXH32_avalanche(h32); - } - assert(0); - return h32; /* reaching this point is deemed impossible */ -} - - -FORCE_INLINE U32 -XXH32_endian_align(const void* input, size_t len, U32 seed, - XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; - -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)16; - } -#endif - - if (len>=16) { - const BYTE* const limit = bEnd - 15; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - - do { - v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; - v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; - v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; - v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p < limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) - + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + PRIME32_5; - } - - h32 += (U32)len; - - return XXH32_finalize(h32, p, len&15, endian, align); -} - - -XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_state_t state; - XXH32_reset(&state, seed); - XXH32_update(&state, input, len); - return XXH32_digest(&state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - - -/*====== Hash streaming ======*/ - -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) -{ - XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)); - state.v1 = seed + PRIME32_1 + PRIME32_2; - state.v2 = seed + PRIME32_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME32_1; - /* do not write into reserved, planned to be removed in a future version */ - memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); - return XXH_OK; -} - - -FORCE_INLINE -XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - - if (input==NULL) -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - return XXH_OK; -#else - return XXH_ERROR; -#endif - - state->total_len_32 += (unsigned)len; - state->large_len |= (len>=16) | (state->total_len_32>=16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (unsigned)len; - return XXH_OK; - } - - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; - state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; - state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; - state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; - state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; - - do { - v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; - v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; - v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; - v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - - -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); -} - - -FORCE_INLINE U32 -XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) -{ - U32 h32; - - if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) - + XXH_rotl32(state->v2, 7) - + XXH_rotl32(state->v3, 12) - + XXH_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); -} - - -XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_digest_endian(state_in, XXH_littleEndian); - else - return XXH32_digest_endian(state_in, XXH_bigEndian); -} - - -/*====== Canonical representation ======*/ - -/*! Default XXH result types are basic unsigned 32 and 64 bits. -* The canonical representation follows human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, remaining comparable across different systems. -*/ - -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - - -#ifndef XXH_NO_LONG_LONG - -/* ******************************************************************* -* 64-bit hash functions -*********************************************************************/ - -/*====== Memory access ======*/ - -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint64_t U64; -# else - /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ - typedef unsigned long long U64; -# endif -#endif - - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; -static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } - -#else - -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ - -static U64 XXH_read64(const void* memPtr) -{ - U64 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap64 _byteswap_uint64 -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap64 __builtin_bswap64 -#else -static U64 XXH_swap64 (U64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif - -FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); - else - return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); -} - -FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); -} - -static U64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} - - -/*====== xxh64 ======*/ - -static const U64 PRIME64_1 = 11400714785074694791ULL; -static const U64 PRIME64_2 = 14029467366897019727ULL; -static const U64 PRIME64_3 = 1609587929392839161ULL; -static const U64 PRIME64_4 = 9650029242287828579ULL; -static const U64 PRIME64_5 = 2870177450012600261ULL; - -static U64 XXH64_round(U64 acc, U64 input) -{ - acc += input * PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= PRIME64_1; - return acc; -} - -static U64 XXH64_mergeRound(U64 acc, U64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * PRIME64_1 + PRIME64_4; - return acc; -} - -static U64 XXH64_avalanche(U64 h64) -{ - h64 ^= h64 >> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - return h64; -} - - -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) - -static U64 -XXH64_finalize(U64 h64, const void* ptr, size_t len, - XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)ptr; - -#define PROCESS1_64 \ - h64 ^= (*p) * PRIME64_5; \ - p++; \ - h64 = XXH_rotl64(h64, 11) * PRIME64_1; - -#define PROCESS4_64 \ - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ - p+=4; \ - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - -#define PROCESS8_64 { \ - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ - p+=8; \ - h64 ^= k1; \ - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ -} - - switch(len&31) { - case 24: PROCESS8_64; - /* fallthrough */ - case 16: PROCESS8_64; - /* fallthrough */ - case 8: PROCESS8_64; - return XXH64_avalanche(h64); - - case 28: PROCESS8_64; - /* fallthrough */ - case 20: PROCESS8_64; - /* fallthrough */ - case 12: PROCESS8_64; - /* fallthrough */ - case 4: PROCESS4_64; - return XXH64_avalanche(h64); - - case 25: PROCESS8_64; - /* fallthrough */ - case 17: PROCESS8_64; - /* fallthrough */ - case 9: PROCESS8_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 29: PROCESS8_64; - /* fallthrough */ - case 21: PROCESS8_64; - /* fallthrough */ - case 13: PROCESS8_64; - /* fallthrough */ - case 5: PROCESS4_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 26: PROCESS8_64; - /* fallthrough */ - case 18: PROCESS8_64; - /* fallthrough */ - case 10: PROCESS8_64; - PROCESS1_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 30: PROCESS8_64; - /* fallthrough */ - case 22: PROCESS8_64; - /* fallthrough */ - case 14: PROCESS8_64; - /* fallthrough */ - case 6: PROCESS4_64; - PROCESS1_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 27: PROCESS8_64; - /* fallthrough */ - case 19: PROCESS8_64; - /* fallthrough */ - case 11: PROCESS8_64; - PROCESS1_64; - PROCESS1_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 31: PROCESS8_64; - /* fallthrough */ - case 23: PROCESS8_64; - /* fallthrough */ - case 15: PROCESS8_64; - /* fallthrough */ - case 7: PROCESS4_64; - /* fallthrough */ - case 3: PROCESS1_64; - /* fallthrough */ - case 2: PROCESS1_64; - /* fallthrough */ - case 1: PROCESS1_64; - /* fallthrough */ - case 0: return XXH64_avalanche(h64); - } - - /* impossible to reach */ - assert(0); - return 0; /* unreachable, but some compilers complain without it */ -} - -FORCE_INLINE U64 -XXH64_endian_align(const void* input, size_t len, U64 seed, - XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U64 h64; - -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif - - if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; - - do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - - } else { - h64 = seed + PRIME64_5; - } - - h64 += (U64) len; - - return XXH64_finalize(h64, p, len, endian, align); -} - - -XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_state_t state; - XXH64_reset(&state, seed); - XXH64_update(&state, input, len); - return XXH64_digest(&state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - -/*====== Hash Streaming ======*/ - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) -{ - XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)); - state.v1 = seed + PRIME64_1 + PRIME64_2; - state.v2 = seed + PRIME64_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME64_1; - /* do not write into reserved, planned to be removed in a future version */ - memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); - return XXH_OK; -} - -FORCE_INLINE -XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - - if (input==NULL) -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - return XXH_OK; -#else - return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - state->memsize += (U32)len; - return XXH_OK; - } - - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); - state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); - state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); - state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); - p += 32-state->memsize; - state->memsize = 0; - } - - if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; - - do { - v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; - v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; - v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; - v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH64_update_endian(state_in, input, len, XXH_bigEndian); -} - -FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) -{ - U64 h64; - - if (state->total_len >= 32) { - U64 const v1 = state->v1; - U64 const v2 = state->v2; - U64 const v3 = state->v3; - U64 const v4 = state->v4; - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - } else { - h64 = state->v3 /*seed*/ + PRIME64_5; - } - - h64 += (U64) state->total_len; - - return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); -} - -XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_digest_endian(state_in, XXH_littleEndian); - else - return XXH64_digest_endian(state_in, XXH_bigEndian); -} - - -/*====== Canonical representation ======*/ - -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); -} - -#endif /* XXH_NO_LONG_LONG */ diff --git a/deps/xxHash/xxhash.h b/deps/xxHash/xxhash.h index d6bad9433..2d56d23c5 100644 --- a/deps/xxHash/xxhash.h +++ b/deps/xxHash/xxhash.h @@ -1,40 +1,42 @@ /* - xxHash - Extremely Fast Hash algorithm - Header File - Copyright (C) 2012-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash source repository : https://github.com/Cyan4973/xxHash -*/ + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (C) 2012-2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ -/* Notice extracted from xxHash homepage : +/* TODO: update */ +/* Notice extracted from xxHash homepage: -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +xxHash is an extremely fast hash algorithm, running at RAM speed limits. It also successfully passes all tests from the SMHasher suite. Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) @@ -57,6 +59,11 @@ Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. +Note: SMHasher's CRC32 implementation is not the fastest one. +Other speed-oriented implementations can be faster, +especially in combination with PCLMUL instruction: +https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735 + A 64-bit version, named XXH64, is available since r35. It offers much better speed, but for 64-bit applications only. Name Speed on 64 bits Speed on 32 bits @@ -64,38 +71,38 @@ XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s */ -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - #if defined (__cplusplus) extern "C" { #endif - -/* **************************** -* Definitions -******************************/ -#include /* size_t */ -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - - /* **************************** - * API modifier + * INLINE mode ******************************/ -/** XXH_INLINE_ALL (and XXH_PRIVATE_API) - * This is useful to include xxhash functions in `static` mode - * in order to inline them, and remove their symbol from the public list. - * Inlining can offer dramatic performance improvement on small keys. - * Methodology : +/*! + * XXH_INLINE_ALL (and XXH_PRIVATE_API) + * Use these build macros to inline xxhash into the target unit. + * Inlining improves performance on small inputs, especially when the length is + * expressed as a compile-time constant: + * + * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html + * + * It also keeps xxHash symbols private to the unit, so they are not exported. + * + * Usage: * #define XXH_INLINE_ALL * #include "xxhash.h" - * `xxhash.c` is automatically included. - * It's not useful to compile and link it as a separate module. + * + * Do not compile and link xxhash.o as a separate object, as it is not useful. */ -#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) -# ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY -# endif +#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \ + && !defined(XXH_INLINE_ALL_31684351384) + /* this section should be traversed only once */ +# define XXH_INLINE_ALL_31684351384 + /* give access to the advanced API, required to compile implementations */ +# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ +# define XXH_STATIC_LINKING_ONLY + /* make all functions private */ +# undef XXH_PUBLIC_API # if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) @@ -103,28 +110,92 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else - /* this version may generate warnings for unused static functions */ + /* note: this version may generate warnings for unused static functions */ # define XXH_PUBLIC_API static # endif -#else -# define XXH_PUBLIC_API /* do nothing */ + + /* + * This part deals with the special case where a unit wants to inline xxHash, + * but "xxhash.h" has previously been included without XXH_INLINE_ALL, such + * as part of some previously included *.h header file. + * Without further action, the new include would just be ignored, + * and functions would effectively _not_ be inlined (silent failure). + * The following macros solve this situation by prefixing all inlined names, + * avoiding naming collision with previous inclusions. + */ +# ifdef XXH_NAMESPACE +# error "XXH_INLINE_ALL with XXH_NAMESPACE is not supported" + /* + * Note: Alternative: #undef all symbols (it's a pretty large list). + * Without #error: it compiles, but functions are actually not inlined. + */ +# endif +# define XXH_NAMESPACE XXH_INLINE_ + /* + * Some identifiers (enums, type names) are not symbols, but they must + * still be renamed to avoid redeclaration. + * Alternative solution: do not redeclare them. + * However, this requires some #ifdefs, and is a more dispersed action. + * Meanwhile, renaming can be achieved in a single block + */ +# define XXH_IPREF(Id) XXH_INLINE_ ## Id +# define XXH_OK XXH_IPREF(XXH_OK) +# define XXH_ERROR XXH_IPREF(XXH_ERROR) +# define XXH_errorcode XXH_IPREF(XXH_errorcode) +# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) +# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) +# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) +# define XXH32_state_s XXH_IPREF(XXH32_state_s) +# define XXH32_state_t XXH_IPREF(XXH32_state_t) +# define XXH64_state_s XXH_IPREF(XXH64_state_s) +# define XXH64_state_t XXH_IPREF(XXH64_state_t) +# define XXH3_state_s XXH_IPREF(XXH3_state_s) +# define XXH3_state_t XXH_IPREF(XXH3_state_t) +# define XXH128_hash_t XXH_IPREF(XXH128_hash_t) + /* Ensure the header is parsed again, even if it was previously included */ +# undef XXHASH_H_5627135585666179 +# undef XXHASH_H_STATIC_13879238742 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ -/*! XXH_NAMESPACE, aka Namespace Emulation : - * - * If you want to include _and expose_ xxHash functions from within your own library, - * but also want to avoid symbol collisions with other libraries which may also include xxHash, + + +/* **************************************************************** + * Stable API + *****************************************************************/ +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +/* specific declaration modes for Windows */ +#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) +# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# ifdef XXH_EXPORT +# define XXH_PUBLIC_API __declspec(dllexport) +# elif XXH_IMPORT +# define XXH_PUBLIC_API __declspec(dllimport) +# endif +# else +# define XXH_PUBLIC_API /* do nothing */ +# endif +#endif + +/*! + * XXH_NAMESPACE, aka Namespace Emulation: * - * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library - * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + * If you want to include _and expose_ xxHash functions from within your own + * library, but also want to avoid symbol collisions with other libraries which + * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix + * any public symbol from xxhash library with the value of XXH_NAMESPACE + * (therefore, avoid empty or numeric values). * - * Note that no change is required within the calling program as long as it includes `xxhash.h` : - * regular symbol name will be automatically translated by this header. + * Note that no change is required within the calling program as long as it + * includes `xxhash.h`: Regular symbol names will be automatically translated + * by this header. */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +/* XXH32 */ # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) @@ -134,6 +205,7 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +/* XXH64 */ # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) @@ -143,6 +215,33 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +/* XXH3_64bits */ +# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) +# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) +# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) +# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) +# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) +# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) +# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) +# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) +# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) +# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) +# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) +# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) +/* XXH3_128bits */ +# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) +# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) +# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) +# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) +# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) +# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) +# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) +# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) +# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) +# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) +# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) +# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) +# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) #endif @@ -150,179 +249,4518 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; * Version ***************************************/ #define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 5 +#define XXH_VERSION_MINOR 8 +#define XXH_VERSION_RELEASE 0 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + /*-********************************************************************** * 32-bit hash ************************************************************************/ -typedef unsigned int XXH32_hash_t; +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint32_t XXH32_hash_t; +#else +# include +# if UINT_MAX == 0xFFFFFFFFUL + typedef unsigned int XXH32_hash_t; +# else +# if ULONG_MAX == 0xFFFFFFFFUL + typedef unsigned long XXH32_hash_t; +# else +# error "unsupported platform: need a 32-bit type" +# endif +# endif +#endif + +/*! + * XXH32(): + * Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". + * The memory between input & input+length must be valid (allocated and read-accessible). + * "seed" can be used to alter the result predictably. + * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s + * + * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems, + * and offers true 64/128 bit hash results. It provides a superior level of + * dispersion, and greatly reduces the risks of collisions. + */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); + +/******* Streaming *******/ -/*! XXH32() : - Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". - The memory between input & input+length must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +/* + * Streaming functions generate the xxHash value from an incrememtal input. + * This method is slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * An XXH state must first be allocated using `XXH*_createState()`. + * + * Start a new hash by initializing the state with a seed using `XXH*_reset()`. + * + * Then, feed the hash state by calling `XXH*_update()` as many times as necessary. + * + * The function returns an error code, with 0 meaning OK, and any other value + * meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using `XXH*_digest()`. + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a + * digest, and generate new hash values later on by invoking `XXH*_digest()`. + * + * When done, release the state using `XXH*_freeState()`. + */ -/*====== Streaming ======*/ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); +/******* Canonical representation *******/ + /* - * Streaming functions generate the xxHash of an input provided in multiple segments. - * Note that, for small input, they are slower than single-call functions, due to state management. - * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. - * - * XXH state must first be allocated, using XXH*_createState() . + * The default return values from XXH functions are unsigned 32 and 64 bit + * integers. + * This the simplest and fastest format for further post-processing. * - * Start a new hash by initializing state with a seed, using XXH*_reset(). + * However, this leaves open the question of what is the order on the byte level, + * since little and big endian conventions will store the same number differently. * - * Then, feed the hash state by calling XXH*_update() as many times as necessary. - * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. - * - * Finally, a hash value can be produced anytime, by using XXH*_digest(). - * This function returns the nn-bits hash as an int or long long. + * The canonical representation settles this issue by mandating big-endian + * convention, the same convention as human-readable numbers (large digits first). * - * It's still possible to continue inserting input into the hash state after a digest, - * and generate some new hashes later on, by calling again XXH*_digest(). + * When writing hash values to storage, sending them over a network, or printing + * them, it's highly recommended to use the canonical representation to ensure + * portability across a wider range of systems, present and future. * - * When done, free XXH state space if it was allocated dynamically. + * The following functions allow transformation of hash values to and from + * canonical format. */ -/*====== Canonical representation ======*/ - typedef struct { unsigned char digest[4]; } XXH32_canonical_t; XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. - * The canonical representation uses human-readable write convention, aka big-endian (large digits first). - * These functions allow transformation of hash result into and from its canonical format. - * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. - */ - #ifndef XXH_NO_LONG_LONG /*-********************************************************************** * 64-bit hash ************************************************************************/ -typedef unsigned long long XXH64_hash_t; +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t XXH64_hash_t; +#else + /* the following type must have a width of 64-bit */ + typedef unsigned long long XXH64_hash_t; +#endif -/*! XXH64() : - Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". - "seed" can be used to alter the result predictably. - This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). -*/ -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); +/*! + * XXH64(): + * Returns the 64-bit hash of sequence of length @length stored at memory + * address @input. + * @seed can be used to alter the result predictably. + * + * This function usually runs faster on 64-bit systems, but slower on 32-bit + * systems (see benchmark). + * + * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems, + * and offers true 64/128 bit hash results. It provides a superior level of + * dispersion, and greatly reduces the risks of collisions. + */ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed); -/*====== Streaming ======*/ +/******* Streaming *******/ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed); XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); -/*====== Canonical representation ======*/ -typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +/******* Canonical representation *******/ +typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); -#endif /* XXH_NO_LONG_LONG */ +/*-********************************************************************** +* XXH3 64-bit variant +************************************************************************/ + +/* ************************************************************************ + * XXH3 is a new hash algorithm featuring: + * - Improved speed for both small and large inputs + * - True 64-bit and 128-bit outputs + * - SIMD acceleration + * - Improved 32-bit viability + * + * Speed analysis methodology is explained here: + * + * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html + * + * In general, expect XXH3 to run about ~2x faster on large inputs and >3x + * faster on small ones compared to XXH64, though exact differences depend on + * the platform. + * + * The algorithm is portable: Like XXH32 and XXH64, it generates the same hash + * on all platforms. + * + * It benefits greatly from SIMD and 64-bit arithmetic, but does not require it. + * + * Almost all 32-bit and 64-bit targets that can run XXH32 smoothly can run + * XXH3 at competitive speeds, even if XXH64 runs slowly. Further details are + * explained in the implementation. + * + * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8, + * ZVector and scalar targets. This can be controlled with the XXH_VECTOR macro. + * + * XXH3 offers 2 variants, _64bits and _128bits. + * When only 64 bits are needed, prefer calling the _64bits variant, as it + * reduces the amount of mixing, resulting in faster speed on small inputs. + * + * It's also generally simpler to manipulate a scalar return type than a struct. + * + * The 128-bit version adds additional strength, but it is slightly slower. + * + * The XXH3 algorithm is still in development. + * The results it produces may still change in future versions. + * + * Results produced by v0.7.x are not comparable with results from v0.7.y. + * However, the API is completely stable, and it can safely be used for + * ephemeral data (local sessions). + * + * Avoid storing values in long-term storage until the algorithm is finalized. + * XXH3's return values will be officially finalized upon reaching v0.8.0. + * + * After which, return values of XXH3 and XXH128 will no longer change in + * future versions. + * + * The API supports one-shot hashing, streaming mode, and custom secrets. + */ -#ifdef XXH_STATIC_LINKING_ONLY +/* XXH3_64bits(): + * default 64-bit variant, using default secret and default seed of 0. + * It's the fastest variant. */ +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len); -/* ================================================================================================ - This section contains declarations which are not guaranteed to remain stable. - They may change in future versions, becoming incompatible with a different version of the library. - These declarations should only be used with static linking. - Never use them in association with dynamic linking ! -=================================================================================================== */ +/* + * XXH3_64bits_withSeed(): + * This variant generates a custom secret on the fly + * based on default secret altered using the `seed` value. + * While this operation is decently fast, note that it's not completely free. + * Note: seed==0 produces the same results as XXH3_64bits(). + */ +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); -/* These definitions are only present to allow - * static allocation of XXH state, on stack or in a struct for example. - * Never **ever** use members directly. */ +/* + * XXH3_64bits_withSecret(): + * It's possible to provide any blob of bytes as a "secret" to generate the hash. + * This makes it more difficult for an external actor to prepare an intentional collision. + * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN). + * However, the quality of produced hash values depends on secret's entropy. + * Technically, the secret must look like a bunch of random bytes. + * Avoid "trivial" or structured data such as repeated sequences or a text document. + * Whenever unsure about the "randomness" of the blob of bytes, + * consider relabelling it as a "custom seed" instead, + * and employ "XXH3_generateSecret()" (see below) + * to generate a high entropy secret derived from the custom seed. + */ +#define XXH3_SECRET_SIZE_MIN 136 +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize); -#if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include -struct XXH32_state_s { - uint32_t total_len_32; - uint32_t large_len; - uint32_t v1; - uint32_t v2; - uint32_t v3; - uint32_t v4; - uint32_t mem32[4]; - uint32_t memsize; - uint32_t reserved; /* never read nor write, might be removed in a future version */ -}; /* typedef'd to XXH32_state_t */ +/******* Streaming *******/ +/* + * Streaming requires state maintenance. + * This operation costs memory and CPU. + * As a consequence, streaming is slower than one-shot hashing. + * For better performance, prefer one-shot functions whenever applicable. + */ +typedef struct XXH3_state_s XXH3_state_t; +XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); +XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state); -struct XXH64_state_s { - uint64_t total_len; - uint64_t v1; - uint64_t v2; - uint64_t v3; - uint64_t v4; - uint64_t mem64[4]; - uint32_t memsize; - uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ -}; /* typedef'd to XXH64_state_t */ +/* + * XXH3_64bits_reset(): + * Initialize with default parameters. + * digest will be equivalent to `XXH3_64bits()`. + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr); +/* + * XXH3_64bits_reset_withSeed(): + * Generate a custom secret from `seed`, and store it into `statePtr`. + * digest will be equivalent to `XXH3_64bits_withSeed()`. + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed); +/* + * XXH3_64bits_reset_withSecret(): + * `secret` is referenced, it _must outlive_ the hash streaming session. + * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`, + * and the quality of produced hash values depends on secret's entropy + * (secret's content should look like a bunch of random bytes). + * When in doubt about the randomness of a candidate `secret`, + * consider employing `XXH3_generateSecret()` instead (see below). + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize); -# else +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr); + +/* note : canonical representation of XXH3 is the same as XXH64 + * since they both produce XXH64_hash_t values */ + + +/*-********************************************************************** +* XXH3 128-bit variant +************************************************************************/ + +typedef struct { + XXH64_hash_t low64; + XXH64_hash_t high64; +} XXH128_hash_t; + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize); + +/******* Streaming *******/ +/* + * Streaming requires state maintenance. + * This operation costs memory and CPU. + * As a consequence, streaming is slower than one-shot hashing. + * For better performance, prefer one-shot functions whenever applicable. + * + * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits(). + * Use already declared XXH3_createState() and XXH3_freeState(). + * + * All reset and streaming functions have same meaning as their 64-bit counterpart. + */ + +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize); + +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr); + +/* Following helper functions make it possible to compare XXH128_hast_t values. + * Since XXH128_hash_t is a structure, this capability is not offered by the language. + * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */ + +/*! + * XXH128_isEqual(): + * Return: 1 if `h1` and `h2` are equal, 0 if they are not. + */ +XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); + +/*! + * XXH128_cmp(): + * + * This comparator is compatible with stdlib's `qsort()`/`bsearch()`. + * + * return: >0 if *h128_1 > *h128_2 + * =0 if *h128_1 == *h128_2 + * <0 if *h128_1 < *h128_2 + */ +XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2); + + +/******* Canonical representation *******/ +typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; +XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash); +XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src); + + +#endif /* XXH_NO_LONG_LONG */ + +#endif /* XXHASH_H_5627135585666179 */ + + + +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) +#define XXHASH_H_STATIC_13879238742 +/* **************************************************************************** + * This section contains declarations which are not guaranteed to remain stable. + * They may change in future versions, becoming incompatible with a different + * version of the library. + * These declarations should only be used with static linking. + * Never use them in association with dynamic linking! + ***************************************************************************** */ + +/* + * These definitions are only present to allow static allocation + * of XXH states, on stack or in a struct, for example. + * Never **ever** access their members directly. + */ struct XXH32_state_s { - unsigned total_len_32; - unsigned large_len; - unsigned v1; - unsigned v2; - unsigned v3; - unsigned v4; - unsigned mem32[4]; - unsigned memsize; - unsigned reserved; /* never read nor write, might be removed in a future version */ + XXH32_hash_t total_len_32; + XXH32_hash_t large_len; + XXH32_hash_t v1; + XXH32_hash_t v2; + XXH32_hash_t v3; + XXH32_hash_t v4; + XXH32_hash_t mem32[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH32_state_t */ -# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ + +#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ + struct XXH64_state_s { - unsigned long long total_len; - unsigned long long v1; - unsigned long long v2; - unsigned long long v3; - unsigned long long v4; - unsigned long long mem64[4]; - unsigned memsize; - unsigned reserved[2]; /* never read nor write, might be removed in a future version */ + XXH64_hash_t total_len; + XXH64_hash_t v1; + XXH64_hash_t v2; + XXH64_hash_t v3; + XXH64_hash_t v4; + XXH64_hash_t mem64[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved32; /* required for padding anyway */ + XXH64_hash_t reserved64; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH64_state_t */ -# endif -# endif +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ +# include +# define XXH_ALIGN(n) alignas(n) +#elif defined(__GNUC__) +# define XXH_ALIGN(n) __attribute__ ((aligned(n))) +#elif defined(_MSC_VER) +# define XXH_ALIGN(n) __declspec(align(n)) +#else +# define XXH_ALIGN(n) /* disabled */ +#endif + +/* Old GCC versions only accept the attribute after the type in structures. */ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ + && defined(__GNUC__) +# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) +#else +# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type +#endif + +#define XXH3_INTERNALBUFFER_SIZE 256 +#define XXH3_SECRET_DEFAULT_SIZE 192 +struct XXH3_state_s { + XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); + /* used to store a custom secret generated from a seed */ + XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); + XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); + XXH32_hash_t bufferedSize; + XXH32_hash_t reserved32; + size_t nbStripesSoFar; + XXH64_hash_t totalLen; + size_t nbStripesPerBlock; + size_t secretLimit; + XXH64_hash_t seed; + XXH64_hash_t reserved64; + const unsigned char* extSecret; /* reference to external secret; + * if == NULL, use .customSecret instead */ + /* note: there may be some padding at the end due to alignment on 64 bytes */ +}; /* typedef'd to XXH3_state_t */ + +#undef XXH_ALIGN_MEMBER + +/* When the XXH3_state_t structure is merely emplaced on stack, + * it should be initialized with XXH3_INITSTATE() or a memset() + * in case its first reset uses XXH3_NNbits_reset_withSeed(). + * This init can be omitted if the first reset uses default or _withSecret mode. + * This operation isn't necessary when the state is created with XXH3_createState(). + * Note that this doesn't prepare the state for a streaming operation, + * it's still necessary to use XXH3_NNbits_reset*() afterwards. + */ +#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; } + + +/* === Experimental API === */ +/* Symbols defined below must be considered tied to a specific library version. */ + +/* + * XXH3_generateSecret(): + * + * Derive a high-entropy secret from any user-defined content, named customSeed. + * The generated secret can be used in combination with `*_withSecret()` functions. + * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed, + * as it becomes much more difficult for an external actor to guess how to impact the calculation logic. + * + * The function accepts as input a custom seed of any length and any content, + * and derives from it a high-entropy secret of length XXH3_SECRET_DEFAULT_SIZE + * into an already allocated buffer secretBuffer. + * The generated secret is _always_ XXH_SECRET_DEFAULT_SIZE bytes long. + * + * The generated secret can then be used with any `*_withSecret()` variant. + * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`, + * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()` + * are part of this list. They all accept a `secret` parameter + * which must be very long for implementation reasons (>= XXH3_SECRET_SIZE_MIN) + * _and_ feature very high entropy (consist of random-looking bytes). + * These conditions can be a high bar to meet, so + * this function can be used to generate a secret of proper quality. + * + * customSeed can be anything. It can have any size, even small ones, + * and its content can be anything, even stupidly "low entropy" source such as a bunch of zeroes. + * The resulting `secret` will nonetheless provide all expected qualities. + * + * Supplying NULL as the customSeed copies the default secret into `secretBuffer`. + * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior. + */ +XXH_PUBLIC_API void XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize); + + +/* simple short-cut to pre-selected XXH3_128bits variant */ +XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed); + + +#endif /* XXH_NO_LONG_LONG */ #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) -# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ +# define XXH_IMPLEMENTATION #endif -#endif /* XXH_STATIC_LINKING_ONLY */ +#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ -#if defined (__cplusplus) +/* ======================================================================== */ +/* ======================================================================== */ +/* ======================================================================== */ + + +/*-********************************************************************** + * xxHash implementation + *-********************************************************************** + * xxHash's implementation used to be hosted inside xxhash.c. + * + * However, inlining requires implementation to be visible to the compiler, + * hence be included alongside the header. + * Previously, implementation was hosted inside xxhash.c, + * which was then #included when inlining was activated. + * This construction created issues with a few build and install systems, + * as it required xxhash.c to be stored in /include directory. + * + * xxHash implementation is now directly integrated within xxhash.h. + * As a consequence, xxhash.c is no longer needed in /include. + * + * xxhash.c is still available and is still useful. + * In a "normal" setup, when xxhash is not inlined, + * xxhash.h only exposes the prototypes and public symbols, + * while xxhash.c can be built into an object file xxhash.o + * which can then be linked into the final binary. + ************************************************************************/ + +#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ + || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) +# define XXH_IMPLEM_13a8737387 + +/* ************************************* +* Tuning parameters +***************************************/ +/*! + * XXH_FORCE_MEMORY_ACCESS: + * By default, access to unaligned memory is controlled by `memcpy()`, which is + * safe and portable. + * + * Unfortunately, on some target/compiler combinations, the generated assembly + * is sub-optimal. + * + * The below switch allow selection of a different access method + * in the search for improved performance. + * Method 0 (default): + * Use `memcpy()`. Safe and portable. Default. + * Method 1: + * `__attribute__((packed))` statement. It depends on compiler extensions + * and is therefore not portable. + * This method is safe if your compiler supports it, and *generally* as + * fast or faster than `memcpy`. + * Method 2: + * Direct access via cast. This method doesn't depend on the compiler but + * violates the C standard. + * It can generate buggy code on targets which do not support unaligned + * memory accesses. + * But in some circumstances, it's the only known way to get the most + * performance (example: GCC + ARMv6) + * Method 3: + * Byteshift. This can generate the best code on old compilers which don't + * inline small `memcpy()` calls, and it might also be faster on big-endian + * systems which lack a native byteswap instruction. + * See https://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2 > 3) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7))) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*! + * XXH_ACCEPT_NULL_INPUT_POINTER: + * If the input pointer is NULL, xxHash's default behavior is to dereference it, + * triggering a segfault. + * When this macro is enabled, xxHash actively checks the input for a null pointer. + * If it is, the result for null input pointers is the same as a zero-length input. + */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif + +/*! + * XXH_FORCE_ALIGN_CHECK: + * This is an important performance trick + * for architectures without decent unaligned memory access performance. + * It checks for input alignment, and when conditions are met, + * uses a "fast path" employing direct 32-bit/64-bit read, + * resulting in _dramatically faster_ read speed. + * + * The check costs one initial branch per hash, which is generally negligible, but not zero. + * Moreover, it's not useful to generate binary for an additional code path + * if memory access uses same instruction for both aligned and unaligned adresses. + * + * In these cases, the alignment check can be removed by setting this macro to 0. + * Then the code will always use unaligned memory access. + * Align check is automatically disabled on x86, x64 & arm64, + * which are platforms known to offer good unaligned memory accesses performance. + * + * This option does not affect XXH3 (only XXH32 and XXH64). + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) \ + || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) /* visual */ +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + +/*! + * XXH_NO_INLINE_HINTS: + * + * By default, xxHash tries to force the compiler to inline almost all internal + * functions. + * + * This can usually improve performance due to reduced jumping and improved + * constant folding, but significantly increases the size of the binary which + * might not be favorable. + * + * Additionally, sometimes the forced inlining can be detrimental to performance, + * depending on the architecture. + * + * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the + * compiler full control on whether to inline or not. + * + * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using + * -fno-inline with GCC or Clang, this will automatically be defined. + */ +#ifndef XXH_NO_INLINE_HINTS +# if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \ + || defined(__NO_INLINE__) /* -O0, -fno-inline */ +# define XXH_NO_INLINE_HINTS 1 +# else +# define XXH_NO_INLINE_HINTS 0 +# endif +#endif + +/*! + * XXH_REROLL: + * Whether to reroll XXH32_finalize, and XXH64_finalize, + * instead of using an unrolled jump table/if statement loop. + * + * This is automatically defined on -Os/-Oz on GCC and Clang. + */ +#ifndef XXH_REROLL +# if defined(__OPTIMIZE_SIZE__) +# define XXH_REROLL 1 +# else +# define XXH_REROLL 0 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/*! + * Modify the local functions below should you wish to use + * different memory routines for malloc() and free() + */ +#include + +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free(void* p) { free(p); } + +/*! and for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) +{ + return memcpy(dest,src,size); } + +#include /* ULLONG_MAX */ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio warning fix */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif -#endif /* XXHASH_H_5627135585666179 */ +#if XXH_NO_INLINE_HINTS /* disable inlining hints */ +# if defined(__GNUC__) +# define XXH_FORCE_INLINE static __attribute__((unused)) +# else +# define XXH_FORCE_INLINE static +# endif +# define XXH_NO_INLINE static +/* enable inlining hints */ +#elif defined(_MSC_VER) /* Visual Studio */ +# define XXH_FORCE_INLINE static __forceinline +# define XXH_NO_INLINE static __declspec(noinline) +#elif defined(__GNUC__) +# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused)) +# define XXH_NO_INLINE static __attribute__((noinline)) +#elif defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ +# define XXH_FORCE_INLINE static inline +# define XXH_NO_INLINE static +#else +# define XXH_FORCE_INLINE static +# define XXH_NO_INLINE static +#endif + + + +/* ************************************* +* Debug +***************************************/ +/* + * XXH_DEBUGLEVEL is expected to be defined externally, typically via the + * compiler's command line options. The value must be a number. + */ +#ifndef XXH_DEBUGLEVEL +# ifdef DEBUGLEVEL /* backwards compat */ +# define XXH_DEBUGLEVEL DEBUGLEVEL +# else +# define XXH_DEBUGLEVEL 0 +# endif +#endif + +#if (XXH_DEBUGLEVEL>=1) +# include /* note: can still be disabled with NDEBUG */ +# define XXH_ASSERT(c) assert(c) +#else +# define XXH_ASSERT(c) ((void)0) +#endif + +/* note: use after variable declarations */ +#define XXH_STATIC_ASSERT(c) do { enum { XXH_sa = 1/(int)(!!(c)) }; } while (0) + + +/* ************************************* +* Basic Types +***************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t xxh_u8; +#else + typedef unsigned char xxh_u8; +#endif +typedef XXH32_hash_t xxh_u32; + +#ifdef XXH_OLD_NAMES +# define BYTE xxh_u8 +# define U8 xxh_u8 +# define U32 xxh_u32 +#endif + +/* *** Memory access *** */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +/* + * Manual byteshift. Best for old compilers which don't inline memcpy. + * We actually directly use XXH_readLE32 and XXH_readBE32. + */ +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* + * Force direct memory access. Only works on CPU which support unaligned memory + * access in hardware. + */ +static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* + * __pack instructions are safer but compiler specific, hence potentially + * problematic for some compilers. + * + * Currently only defined for GCC and ICC. + */ +#ifdef XXH_OLD_NAMES +typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; +#endif +static xxh_u32 XXH_read32(const void* ptr) +{ + typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign; + return ((const xxh_unalign*)ptr)->u32; +} + +#else + +/* + * Portable and safe solution. Generally efficient. + * see: https://stackoverflow.com/a/32095106/646947 + */ +static xxh_u32 XXH_read32(const void* memPtr) +{ + xxh_u32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* *** Endianess *** */ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/*! + * XXH_CPU_LITTLE_ENDIAN: + * Defined to 1 if the target is little endian, or 0 if it is big endian. + * It can be defined externally, for example on the compiler command line. + * + * If it is not defined, a runtime check (which is usually constant folded) + * is used instead. + */ +#ifndef XXH_CPU_LITTLE_ENDIAN +/* + * Try to detect endianness automatically, to avoid the nonstandard behavior + * in `XXH_isLittleEndian()` + */ +# if defined(_WIN32) /* Windows is always little endian */ \ + || defined(__LITTLE_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 0 +# else +/* + * runtime test, presumed to simplify to a constant by compiler + */ +static int XXH_isLittleEndian(void) +{ + /* + * Portable and well-defined behavior. + * Don't use static: it is detrimental to performance. + */ + const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +# endif +#endif + + + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#ifdef __has_builtin +# define XXH_HAS_BUILTIN(x) __has_builtin(x) +#else +# define XXH_HAS_BUILTIN(x) 0 +#endif + +#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ + && XXH_HAS_BUILTIN(__builtin_rotateleft64) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 +/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +#elif defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static xxh_u32 XXH_swap32 (xxh_u32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +/* + * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. + * + * This is ideal for older compilers which don't inline memcpy. + */ +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) + +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] + | ((xxh_u32)bytePtr[1] << 8) + | ((xxh_u32)bytePtr[2] << 16) + | ((xxh_u32)bytePtr[3] << 24); +} + +XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[3] + | ((xxh_u32)bytePtr[2] << 8) + | ((xxh_u32)bytePtr[1] << 16) + | ((xxh_u32)bytePtr[0] << 24); +} + +#else +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); +} + +static xxh_u32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} +#endif + +XXH_FORCE_INLINE xxh_u32 +XXH_readLE32_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) { + return XXH_readLE32(ptr); + } else { + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); + } +} + + +/* ************************************* +* Misc +***************************************/ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +* 32-bit hash functions +*********************************************************************/ +static const xxh_u32 XXH_PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ +static const xxh_u32 XXH_PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ +static const xxh_u32 XXH_PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ +static const xxh_u32 XXH_PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ +static const xxh_u32 XXH_PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */ + +#ifdef XXH_OLD_NAMES +# define PRIME32_1 XXH_PRIME32_1 +# define PRIME32_2 XXH_PRIME32_2 +# define PRIME32_3 XXH_PRIME32_3 +# define PRIME32_4 XXH_PRIME32_4 +# define PRIME32_5 XXH_PRIME32_5 +#endif + +static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) +{ + acc += input * XXH_PRIME32_2; + acc = XXH_rotl32(acc, 13); + acc *= XXH_PRIME32_1; +#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE) + /* + * UGLY HACK: + * This inline assembly hack forces acc into a normal register. This is the + * only thing that prevents GCC and Clang from autovectorizing the XXH32 + * loop (pragmas and attributes don't work for some resason) without globally + * disabling SSE4.1. + * + * The reason we want to avoid vectorization is because despite working on + * 4 integers at a time, there are multiple factors slowing XXH32 down on + * SSE4: + * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on + * newer chips!) making it slightly slower to multiply four integers at + * once compared to four integers independently. Even when pmulld was + * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE + * just to multiply unless doing a long operation. + * + * - Four instructions are required to rotate, + * movqda tmp, v // not required with VEX encoding + * pslld tmp, 13 // tmp <<= 13 + * psrld v, 19 // x >>= 19 + * por v, tmp // x |= tmp + * compared to one for scalar: + * roll v, 13 // reliably fast across the board + * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason + * + * - Instruction level parallelism is actually more beneficial here because + * the SIMD actually serializes this operation: While v1 is rotating, v2 + * can load data, while v3 can multiply. SSE forces them to operate + * together. + * + * How this hack works: + * __asm__("" // Declare an assembly block but don't declare any instructions + * : // However, as an Input/Output Operand, + * "+r" // constrain a read/write operand (+) as a general purpose register (r). + * (acc) // and set acc as the operand + * ); + * + * Because of the 'r', the compiler has promised that seed will be in a + * general purpose register and the '+' says that it will be 'read/write', + * so it has to assume it has changed. It is like volatile without all the + * loads and stores. + * + * Since the argument has to be in a normal register (not an SSE register), + * each time XXH32_round is called, it is impossible to vectorize. + */ + __asm__("" : "+r" (acc)); +#endif + return acc; +} + +/* mix all bits */ +static xxh_u32 XXH32_avalanche(xxh_u32 h32) +{ + h32 ^= h32 >> 15; + h32 *= XXH_PRIME32_2; + h32 ^= h32 >> 13; + h32 *= XXH_PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, align) + +static xxh_u32 +XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define XXH_PROCESS1 do { \ + h32 += (*ptr++) * XXH_PRIME32_5; \ + h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1; \ +} while (0) + +#define XXH_PROCESS4 do { \ + h32 += XXH_get32bits(ptr) * XXH_PRIME32_3; \ + ptr += 4; \ + h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \ +} while (0) + + /* Compact rerolled version */ + if (XXH_REROLL) { + len &= 15; + while (len >= 4) { + XXH_PROCESS4; + len -= 4; + } + while (len > 0) { + XXH_PROCESS1; + --len; + } + return XXH32_avalanche(h32); + } else { + switch(len&15) /* or switch(bEnd - p) */ { + case 12: XXH_PROCESS4; + /* fallthrough */ + case 8: XXH_PROCESS4; + /* fallthrough */ + case 4: XXH_PROCESS4; + return XXH32_avalanche(h32); + + case 13: XXH_PROCESS4; + /* fallthrough */ + case 9: XXH_PROCESS4; + /* fallthrough */ + case 5: XXH_PROCESS4; + XXH_PROCESS1; + return XXH32_avalanche(h32); + + case 14: XXH_PROCESS4; + /* fallthrough */ + case 10: XXH_PROCESS4; + /* fallthrough */ + case 6: XXH_PROCESS4; + XXH_PROCESS1; + XXH_PROCESS1; + return XXH32_avalanche(h32); + + case 15: XXH_PROCESS4; + /* fallthrough */ + case 11: XXH_PROCESS4; + /* fallthrough */ + case 7: XXH_PROCESS4; + /* fallthrough */ + case 3: XXH_PROCESS1; + /* fallthrough */ + case 2: XXH_PROCESS1; + /* fallthrough */ + case 1: XXH_PROCESS1; + /* fallthrough */ + case 0: return XXH32_avalanche(h32); + } + XXH_ASSERT(0); + return h32; /* reaching this point is deemed impossible */ + } +} + +#ifdef XXH_OLD_NAMES +# define PROCESS1 XXH_PROCESS1 +# define PROCESS4 XXH_PROCESS4 +#else +# undef XXH_PROCESS1 +# undef XXH_PROCESS4 +#endif + +XXH_FORCE_INLINE xxh_u32 +XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u32 h32; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)16; + } +#endif + + if (len>=16) { + const xxh_u8* const limit = bEnd - 15; + xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; + xxh_u32 v2 = seed + XXH_PRIME32_2; + xxh_u32 v3 = seed + 0; + xxh_u32 v4 = seed - XXH_PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; + v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; + v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; + v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; + } while (input < limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + XXH_PRIME32_5; + } + + h32 += (xxh_u32)len; + + return XXH32_finalize(h32, input, len&15, align); +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, (const xxh_u8*)input, len); + return XXH32_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); +#endif +} + + + +/******* Hash streaming *******/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; + state.v2 = seed + XXH_PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - XXH_PRIME32_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode +XXH32_update(XXH32_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len_32 += (XXH32_hash_t)len; + state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); + state->memsize += (XXH32_hash_t)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const xxh_u32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32)); + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const xxh_u8* const limit = bEnd - 16; + xxh_u32 v1 = state->v1; + xxh_u32 v2 = state->v2; + xxh_u32 v3 = state->v3; + xxh_u32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state) +{ + xxh_u32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + XXH_PRIME32_5; + } + + h32 += state->total_len_32; + + return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); +} + + +/******* Canonical representation *******/ + +/* + * The default return values from XXH functions are unsigned 32 and 64 bit + * integers. + * + * The canonical representation uses big endian convention, the same convention + * as human-readable numbers (large digits first). + * + * This way, hash values can be written into a file or buffer, remaining + * comparable across different systems. + * + * The following functions allow transformation of hash values to and from their + * canonical format. + */ +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bit hash functions +*********************************************************************/ + +/******* Memory access *******/ + +typedef XXH64_hash_t xxh_u64; + +#ifdef XXH_OLD_NAMES +# define U64 xxh_u64 +#endif + +/*! + * XXH_REROLL_XXH64: + * Whether to reroll the XXH64_finalize() loop. + * + * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a + * performance gain on 64-bit hosts, as only one jump is required. + * + * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit + * registers, and 64-bit arithmetic needs to be simulated, it isn't beneficial + * to unroll. The code becomes ridiculously large (the largest function in the + * binary on i386!), and rerolling it saves anywhere from 3kB to 20kB. It is + * also slightly faster because it fits into cache better and is more likely + * to be inlined by the compiler. + * + * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. + */ +#ifndef XXH_REROLL_XXH64 +# if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \ + || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \ + || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \ + || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \ + || defined(__mips64__) || defined(__mips64)) /* mips64 */ \ + || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */ +# define XXH_REROLL_XXH64 1 +# else +# define XXH_REROLL_XXH64 0 +# endif +#endif /* !defined(XXH_REROLL_XXH64) */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +/* + * Manual byteshift. Best for old compilers which don't inline memcpy. + * We actually directly use XXH_readLE64 and XXH_readBE64. + */ +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* + * __pack instructions are safer, but compiler specific, hence potentially + * problematic for some compilers. + * + * Currently only defined for GCC and ICC. + */ +#ifdef XXH_OLD_NAMES +typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; +#endif +static xxh_u64 XXH_read64(const void* ptr) +{ + typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64; + return ((const xxh_unalign64*)ptr)->u64; +} + +#else + +/* + * Portable and safe solution. Generally efficient. + * see: https://stackoverflow.com/a/32095106/646947 + */ +static xxh_u64 XXH_read64(const void* memPtr) +{ + xxh_u64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static xxh_u64 XXH_swap64 (xxh_u64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */ +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) + +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] + | ((xxh_u64)bytePtr[1] << 8) + | ((xxh_u64)bytePtr[2] << 16) + | ((xxh_u64)bytePtr[3] << 24) + | ((xxh_u64)bytePtr[4] << 32) + | ((xxh_u64)bytePtr[5] << 40) + | ((xxh_u64)bytePtr[6] << 48) + | ((xxh_u64)bytePtr[7] << 56); +} + +XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[7] + | ((xxh_u64)bytePtr[6] << 8) + | ((xxh_u64)bytePtr[5] << 16) + | ((xxh_u64)bytePtr[4] << 24) + | ((xxh_u64)bytePtr[3] << 32) + | ((xxh_u64)bytePtr[2] << 40) + | ((xxh_u64)bytePtr[1] << 48) + | ((xxh_u64)bytePtr[0] << 56); +} + +#else +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); +} + +static xxh_u64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} +#endif + +XXH_FORCE_INLINE xxh_u64 +XXH_readLE64_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) + return XXH_readLE64(ptr); + else + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); +} + + +/******* xxh64 *******/ + +static const xxh_u64 XXH_PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ +static const xxh_u64 XXH_PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ +static const xxh_u64 XXH_PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */ +static const xxh_u64 XXH_PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */ +static const xxh_u64 XXH_PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */ + +#ifdef XXH_OLD_NAMES +# define PRIME64_1 XXH_PRIME64_1 +# define PRIME64_2 XXH_PRIME64_2 +# define PRIME64_3 XXH_PRIME64_3 +# define PRIME64_4 XXH_PRIME64_4 +# define PRIME64_5 XXH_PRIME64_5 +#endif + +static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) +{ + acc += input * XXH_PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= XXH_PRIME64_1; + return acc; +} + +static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; + return acc; +} + +static xxh_u64 XXH64_avalanche(xxh_u64 h64) +{ + h64 ^= h64 >> 33; + h64 *= XXH_PRIME64_2; + h64 ^= h64 >> 29; + h64 *= XXH_PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, align) + +static xxh_u64 +XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define XXH_PROCESS1_64 do { \ + h64 ^= (*ptr++) * XXH_PRIME64_5; \ + h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; \ +} while (0) + +#define XXH_PROCESS4_64 do { \ + h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; \ + ptr += 4; \ + h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; \ +} while (0) + +#define XXH_PROCESS8_64 do { \ + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \ + ptr += 8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4; \ +} while (0) + + /* Rerolled version for 32-bit targets is faster and much smaller. */ + if (XXH_REROLL || XXH_REROLL_XXH64) { + len &= 31; + while (len >= 8) { + XXH_PROCESS8_64; + len -= 8; + } + if (len >= 4) { + XXH_PROCESS4_64; + len -= 4; + } + while (len > 0) { + XXH_PROCESS1_64; + --len; + } + return XXH64_avalanche(h64); + } else { + switch(len & 31) { + case 24: XXH_PROCESS8_64; + /* fallthrough */ + case 16: XXH_PROCESS8_64; + /* fallthrough */ + case 8: XXH_PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: XXH_PROCESS8_64; + /* fallthrough */ + case 20: XXH_PROCESS8_64; + /* fallthrough */ + case 12: XXH_PROCESS8_64; + /* fallthrough */ + case 4: XXH_PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: XXH_PROCESS8_64; + /* fallthrough */ + case 17: XXH_PROCESS8_64; + /* fallthrough */ + case 9: XXH_PROCESS8_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: XXH_PROCESS8_64; + /* fallthrough */ + case 21: XXH_PROCESS8_64; + /* fallthrough */ + case 13: XXH_PROCESS8_64; + /* fallthrough */ + case 5: XXH_PROCESS4_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: XXH_PROCESS8_64; + /* fallthrough */ + case 18: XXH_PROCESS8_64; + /* fallthrough */ + case 10: XXH_PROCESS8_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: XXH_PROCESS8_64; + /* fallthrough */ + case 22: XXH_PROCESS8_64; + /* fallthrough */ + case 14: XXH_PROCESS8_64; + /* fallthrough */ + case 6: XXH_PROCESS4_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: XXH_PROCESS8_64; + /* fallthrough */ + case 19: XXH_PROCESS8_64; + /* fallthrough */ + case 11: XXH_PROCESS8_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: XXH_PROCESS8_64; + /* fallthrough */ + case 23: XXH_PROCESS8_64; + /* fallthrough */ + case 15: XXH_PROCESS8_64; + /* fallthrough */ + case 7: XXH_PROCESS4_64; + /* fallthrough */ + case 3: XXH_PROCESS1_64; + /* fallthrough */ + case 2: XXH_PROCESS1_64; + /* fallthrough */ + case 1: XXH_PROCESS1_64; + /* fallthrough */ + case 0: return XXH64_avalanche(h64); + } + } + /* impossible to reach */ + XXH_ASSERT(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +#ifdef XXH_OLD_NAMES +# define PROCESS1_64 XXH_PROCESS1_64 +# define PROCESS4_64 XXH_PROCESS4_64 +# define PROCESS8_64 XXH_PROCESS8_64 +#else +# undef XXH_PROCESS1_64 +# undef XXH_PROCESS4_64 +# undef XXH_PROCESS8_64 +#endif + +XXH_FORCE_INLINE xxh_u64 +XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u64 h64; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)32; + } +#endif + + if (len>=32) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + xxh_u64 v2 = seed + XXH_PRIME64_2; + xxh_u64 v3 = seed + 0; + xxh_u64 v4 = seed - XXH_PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; + v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; + v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; + v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; + } while (input<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + XXH_PRIME64_5; + } + + h64 += (xxh_u64) len; + + return XXH64_finalize(h64, input, len, align); +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, (const xxh_u8*)input, len); + return XXH64_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); + +#endif +} + +/******* Hash Streaming *******/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed) +{ + XXH64_state_t state; /* use a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + state.v2 = seed + XXH_PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - XXH_PRIME64_1; + /* do not write into reserved64, might be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64)); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH64_update (XXH64_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); + state->memsize += (xxh_u32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = state->v1; + xxh_u64 v2 = state->v2; + xxh_u64 v3 = state->v3; + xxh_u64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state) +{ + xxh_u64 h64; + + if (state->total_len >= 32) { + xxh_u64 const v1 = state->v1; + xxh_u64 const v2 = state->v2; + xxh_u64 const v3 = state->v3; + xxh_u64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 /*seed*/ + XXH_PRIME64_5; + } + + h64 += (xxh_u64) state->total_len; + + return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); +} + + +/******* Canonical representation *******/ + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + + + +/* ********************************************************************* +* XXH3 +* New generation hash designed for speed on small keys and vectorization +************************************************************************ */ + +/* === Compiler specifics === */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +# define XXH_RESTRICT restrict +#else +/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */ +# define XXH_RESTRICT /* disable */ +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ + || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ + || defined(__clang__) +# define XXH_likely(x) __builtin_expect(x, 1) +# define XXH_unlikely(x) __builtin_expect(x, 0) +#else +# define XXH_likely(x) (x) +# define XXH_unlikely(x) (x) +#endif + +#if defined(__GNUC__) +# if defined(__AVX2__) +# include +# elif defined(__SSE2__) +# include +# elif defined(__ARM_NEON__) || defined(__ARM_NEON) +# define inline __inline__ /* circumvent a clang bug */ +# include +# undef inline +# endif +#elif defined(_MSC_VER) +# include +#endif + +/* + * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while + * remaining a true 64-bit/128-bit hash function. + * + * This is done by prioritizing a subset of 64-bit operations that can be + * emulated without too many steps on the average 32-bit machine. + * + * For example, these two lines seem similar, and run equally fast on 64-bit: + * + * xxh_u64 x; + * x ^= (x >> 47); // good + * x ^= (x >> 13); // bad + * + * However, to a 32-bit machine, there is a major difference. + * + * x ^= (x >> 47) looks like this: + * + * x.lo ^= (x.hi >> (47 - 32)); + * + * while x ^= (x >> 13) looks like this: + * + * // note: funnel shifts are not usually cheap. + * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); + * x.hi ^= (x.hi >> 13); + * + * The first one is significantly faster than the second, simply because the + * shift is larger than 32. This means: + * - All the bits we need are in the upper 32 bits, so we can ignore the lower + * 32 bits in the shift. + * - The shift result will always fit in the lower 32 bits, and therefore, + * we can ignore the upper 32 bits in the xor. + * + * Thanks to this optimization, XXH3 only requires these features to be efficient: + * + * - Usable unaligned access + * - A 32-bit or 64-bit ALU + * - If 32-bit, a decent ADC instruction + * - A 32 or 64-bit multiply with a 64-bit result + * - For the 128-bit variant, a decent byteswap helps short inputs. + * + * The first two are already required by XXH32, and almost all 32-bit and 64-bit + * platforms which can run XXH32 can run XXH3 efficiently. + * + * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one + * notable exception. + * + * First of all, Thumb-1 lacks support for the UMULL instruction which + * performs the important long multiply. This means numerous __aeabi_lmul + * calls. + * + * Second of all, the 8 functional registers are just not enough. + * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need + * Lo registers, and this shuffling results in thousands more MOVs than A32. + * + * A32 and T32 don't have this limitation. They can access all 14 registers, + * do a 32->64 multiply with UMULL, and the flexible operand allowing free + * shifts is helpful, too. + * + * Therefore, we do a quick sanity check. + * + * If compiling Thumb-1 for a target which supports ARM instructions, we will + * emit a warning, as it is not a "sane" platform to compile for. + * + * Usually, if this happens, it is because of an accident and you probably need + * to specify -march, as you likely meant to compile for a newer architecture. + * + * Credit: large sections of the vectorial and asm source code paths + * have been contributed by @easyaspi314 + */ +#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM) +# warning "XXH3 is highly inefficient without ARM or Thumb-2." +#endif + +/* ========================================== + * Vectorization detection + * ========================================== */ +#define XXH_SCALAR 0 /* Portable scalar version */ +#define XXH_SSE2 1 /* SSE2 for Pentium 4 and all x86_64 */ +#define XXH_AVX2 2 /* AVX2 for Haswell and Bulldozer */ +#define XXH_AVX512 3 /* AVX512 for Skylake and Icelake */ +#define XXH_NEON 4 /* NEON for most ARMv7-A and all AArch64 */ +#define XXH_VSX 5 /* VSX and ZVector for POWER8/z13 */ + +#ifndef XXH_VECTOR /* can be defined on command line */ +# if defined(__AVX512F__) +# define XXH_VECTOR XXH_AVX512 +# elif defined(__AVX2__) +# define XXH_VECTOR XXH_AVX2 +# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +# define XXH_VECTOR XXH_SSE2 +# elif defined(__GNUC__) /* msvc support maybe later */ \ + && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \ + && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) +# define XXH_VECTOR XXH_NEON +# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \ + || (defined(__s390x__) && defined(__VEC__)) \ + && defined(__GNUC__) /* TODO: IBM XL */ +# define XXH_VECTOR XXH_VSX +# else +# define XXH_VECTOR XXH_SCALAR +# endif +#endif + +/* + * Controls the alignment of the accumulator, + * for compatibility with aligned vector loads, which are usually faster. + */ +#ifndef XXH_ACC_ALIGN +# if defined(XXH_X86DISPATCH) +# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ +# elif XXH_VECTOR == XXH_SCALAR /* scalar */ +# define XXH_ACC_ALIGN 8 +# elif XXH_VECTOR == XXH_SSE2 /* sse2 */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_AVX2 /* avx2 */ +# define XXH_ACC_ALIGN 32 +# elif XXH_VECTOR == XXH_NEON /* neon */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_VSX /* vsx */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_AVX512 /* avx512 */ +# define XXH_ACC_ALIGN 64 +# endif +#endif + +#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \ + || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 +# define XXH_SEC_ALIGN XXH_ACC_ALIGN +#else +# define XXH_SEC_ALIGN 8 +#endif + +/* + * UGLY HACK: + * GCC usually generates the best code with -O3 for xxHash. + * + * However, when targeting AVX2, it is overzealous in its unrolling resulting + * in code roughly 3/4 the speed of Clang. + * + * There are other issues, such as GCC splitting _mm256_loadu_si256 into + * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which + * only applies to Sandy and Ivy Bridge... which don't even support AVX2. + * + * That is why when compiling the AVX2 version, it is recommended to use either + * -O2 -mavx2 -march=haswell + * or + * -O2 -mavx2 -mno-avx256-split-unaligned-load + * for decent performance, or to use Clang instead. + * + * Fortunately, we can control the first one with a pragma that forces GCC into + * -O2, but the other one we can't control without "failed to inline always + * inline function due to target mismatch" warnings. + */ +#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ +# pragma GCC push_options +# pragma GCC optimize("-O2") +#endif + + +#if XXH_VECTOR == XXH_NEON +/* + * NEON's setup for vmlal_u32 is a little more complicated than it is on + * SSE2, AVX2, and VSX. + * + * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast. + * + * To do the same operation, the 128-bit 'Q' register needs to be split into + * two 64-bit 'D' registers, performing this operation:: + * + * [ a | b ] + * | '---------. .--------' | + * | x | + * | .---------' '--------. | + * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32 ] + * + * Due to significant changes in aarch64, the fastest method for aarch64 is + * completely different than the fastest method for ARMv7-A. + * + * ARMv7-A treats D registers as unions overlaying Q registers, so modifying + * D11 will modify the high half of Q5. This is similar to how modifying AH + * will only affect bits 8-15 of AX on x86. + * + * VZIP takes two registers, and puts even lanes in one register and odd lanes + * in the other. + * + * On ARMv7-A, this strangely modifies both parameters in place instead of + * taking the usual 3-operand form. + * + * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the + * lower and upper halves of the Q register to end up with the high and low + * halves where we want - all in one instruction. + * + * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] } + * + * Unfortunately we need inline assembly for this: Instructions modifying two + * registers at once is not possible in GCC or Clang's IR, and they have to + * create a copy. + * + * aarch64 requires a different approach. + * + * In order to make it easier to write a decent compiler for aarch64, many + * quirks were removed, such as conditional execution. + * + * NEON was also affected by this. + * + * aarch64 cannot access the high bits of a Q-form register, and writes to a + * D-form register zero the high bits, similar to how writes to W-form scalar + * registers (or DWORD registers on x86_64) work. + * + * The formerly free vget_high intrinsics now require a vext (with a few + * exceptions) + * + * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent + * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one + * operand. + * + * The equivalent of the VZIP.32 on the lower and upper halves would be this + * mess: + * + * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] } + * zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] } + * zip2 v0.2s, v0.2s, v1.2s // v0 = { v0[1], v2[1] } + * + * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN): + * + * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32); + * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF); + * + * This is available on ARMv7-A, but is less efficient than a single VZIP.32. + */ + +/* + * Function-like macro: + * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi) + * { + * outLo = (uint32x2_t)(in & 0xFFFFFFFF); + * outHi = (uint32x2_t)(in >> 32); + * in = UNDEFINED; + * } + */ +# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \ + && defined(__GNUC__) \ + && !defined(__aarch64__) && !defined(__arm64__) +# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ + do { \ + /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \ + /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */ \ + /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \ + __asm__("vzip.32 %e0, %f0" : "+w" (in)); \ + (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in)); \ + (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \ + } while (0) +# else +# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ + do { \ + (outLo) = vmovn_u64 (in); \ + (outHi) = vshrn_n_u64 ((in), 32); \ + } while (0) +# endif +#endif /* XXH_VECTOR == XXH_NEON */ + +/* + * VSX and Z Vector helpers. + * + * This is very messy, and any pull requests to clean this up are welcome. + * + * There are a lot of problems with supporting VSX and s390x, due to + * inconsistent intrinsics, spotty coverage, and multiple endiannesses. + */ +#if XXH_VECTOR == XXH_VSX +# if defined(__s390x__) +# include +# else +/* gcc's altivec.h can have the unwanted consequence to unconditionally + * #define bool, vector, and pixel keywords, + * with bad consequences for programs already using these keywords for other purposes. + * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined. + * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler, + * but it seems that, in some cases, it isn't. + * Force the build macro to be defined, so that keywords are not altered. + */ +# if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__) +# define __APPLE_ALTIVEC__ +# endif +# include +# endif + +typedef __vector unsigned long long xxh_u64x2; +typedef __vector unsigned char xxh_u8x16; +typedef __vector unsigned xxh_u32x4; + +# ifndef XXH_VSX_BE +# if defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_VSX_BE 1 +# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ +# warning "-maltivec=be is not recommended. Please use native endianness." +# define XXH_VSX_BE 1 +# else +# define XXH_VSX_BE 0 +# endif +# endif /* !defined(XXH_VSX_BE) */ + +# if XXH_VSX_BE +/* A wrapper for POWER9's vec_revb. */ +# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) +# define XXH_vec_revb vec_revb +# else +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) +{ + xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, + 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; + return vec_perm(val, val, vByteSwap); +} +# endif +# endif /* XXH_VSX_BE */ + +/* + * Performs an unaligned load and byte swaps it on big endian. + */ +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) +{ + xxh_u64x2 ret; + memcpy(&ret, ptr, sizeof(xxh_u64x2)); +# if XXH_VSX_BE + ret = XXH_vec_revb(ret); +# endif + return ret; +} + +/* + * vec_mulo and vec_mule are very problematic intrinsics on PowerPC + * + * These intrinsics weren't added until GCC 8, despite existing for a while, + * and they are endian dependent. Also, their meaning swap depending on version. + * */ +# if defined(__s390x__) + /* s390x is always big endian, no issue on this platform */ +# define XXH_vec_mulo vec_mulo +# define XXH_vec_mule vec_mule +# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) +/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */ +# define XXH_vec_mulo __builtin_altivec_vmulouw +# define XXH_vec_mule __builtin_altivec_vmuleuw +# else +/* gcc needs inline assembly */ +/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) +{ + xxh_u64x2 result; + __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); + return result; +} +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) +{ + xxh_u64x2 result; + __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); + return result; +} +# endif /* XXH_vec_mulo, XXH_vec_mule */ +#endif /* XXH_VECTOR == XXH_VSX */ + + +/* prefetch + * can be disabled, by declaring XXH_NO_PREFETCH build macro */ +#if defined(XXH_NO_PREFETCH) +# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# else +# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* XXH_NO_PREFETCH */ + + +/* ========================================== + * XXH3 default settings + * ========================================== */ + +#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ + +#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) +# error "default keyset is not large enough" +#endif + +/* Pseudorandom secret taken directly from FARSH */ +XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +}; + + +#ifdef XXH_OLD_NAMES +# define kSecret XXH3_kSecret +#endif + +/* + * Calculates a 32-bit to 64-bit long multiply. + * + * Wraps __emulu on MSVC x86 because it tends to call __allmul when it doesn't + * need to (but it shouldn't need to anyways, it is about 7 instructions to do + * a 64x64 multiply...). Since we know that this will _always_ emit MULL, we + * use that instead of the normal method. + * + * If you are compiling for platforms like Thumb-1 and don't have a better option, + * you may also want to write your own long multiply routine here. + * + * XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y) + * { + * return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); + * } + */ +#if defined(_MSC_VER) && defined(_M_IX86) +# include +# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) +#else +/* + * Downcast + upcast is usually better than masking on older compilers like + * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers. + * + * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands + * and perform a full 64x64 multiply -- entirely redundant on 32-bit. + */ +# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) +#endif + +/* + * Calculates a 64->128-bit long multiply. + * + * Uses __uint128_t and _umul128 if available, otherwise uses a scalar version. + */ +static XXH128_hash_t +XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) +{ + /* + * GCC/Clang __uint128_t method. + * + * On most 64-bit targets, GCC and Clang define a __uint128_t type. + * This is usually the best way as it usually uses a native long 64-bit + * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. + * + * Usually. + * + * Despite being a 32-bit platform, Clang (and emscripten) define this type + * despite not having the arithmetic for it. This results in a laggy + * compiler builtin call which calculates a full 128-bit multiply. + * In that case it is best to use the portable one. + * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 + */ +#if defined(__GNUC__) && !defined(__wasm__) \ + && defined(__SIZEOF_INT128__) \ + || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + + __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; + XXH128_hash_t r128; + r128.low64 = (xxh_u64)(product); + r128.high64 = (xxh_u64)(product >> 64); + return r128; + + /* + * MSVC for x64's _umul128 method. + * + * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); + * + * This compiles to single operand MUL on x64. + */ +#elif defined(_M_X64) || defined(_M_IA64) + +#ifndef _MSC_VER +# pragma intrinsic(_umul128) +#endif + xxh_u64 product_high; + xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); + XXH128_hash_t r128; + r128.low64 = product_low; + r128.high64 = product_high; + return r128; + +#else + /* + * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. + * + * This is a fast and simple grade school multiply, which is shown below + * with base 10 arithmetic instead of base 0x100000000. + * + * 9 3 // D2 lhs = 93 + * x 7 5 // D2 rhs = 75 + * ---------- + * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 + * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 + * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 + * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 + * --------- + * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 + * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 + * --------- + * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 + * + * The reasons for adding the products like this are: + * 1. It avoids manual carry tracking. Just like how + * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. + * This avoids a lot of complexity. + * + * 2. It hints for, and on Clang, compiles to, the powerful UMAAL + * instruction available in ARM's Digital Signal Processing extension + * in 32-bit ARMv6 and later, which is shown below: + * + * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) + * { + * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; + * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); + * *RdHi = (xxh_u32)(product >> 32); + * } + * + * This instruction was designed for efficient long multiplication, and + * allows this to be calculated in only 4 instructions at speeds + * comparable to some 64-bit ALUs. + * + * 3. It isn't terrible on other platforms. Usually this will be a couple + * of 32-bit ADD/ADCs. + */ + + /* First calculate all of the cross products. */ + xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); + xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); + xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); + xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); + + /* Now add the products together. These will never overflow. */ + xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + + XXH128_hash_t r128; + r128.low64 = lower; + r128.high64 = upper; + return r128; +#endif +} + +/* + * Does a 64-bit to 128-bit multiply, then XOR folds it. + * + * The reason for the separate function is to prevent passing too many structs + * around by value. This will hopefully inline the multiply, but we don't force it. + */ +static xxh_u64 +XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) +{ + XXH128_hash_t product = XXH_mult64to128(lhs, rhs); + return product.low64 ^ product.high64; +} + +/* Seems to produce slightly better code on GCC for some reason. */ +XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) +{ + XXH_ASSERT(0 <= shift && shift < 64); + return v64 ^ (v64 >> shift); +} + +/* + * This is a fast avalanche stage, + * suitable when input bits are already partially mixed + */ +static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) +{ + h64 = XXH_xorshift64(h64, 37); + h64 *= 0x165667919E3779F9ULL; + h64 = XXH_xorshift64(h64, 32); + return h64; +} + +/* + * This is a stronger avalanche, + * inspired by Pelle Evensen's rrmxmx + * preferable when input has not been previously mixed + */ +static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) +{ + /* this mix is inspired by Pelle Evensen's rrmxmx */ + h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); + h64 *= 0x9FB21C651E98DF25ULL; + h64 ^= (h64 >> 35) + len ; + h64 *= 0x9FB21C651E98DF25ULL; + return XXH_xorshift64(h64, 28); +} + + +/* ========================================== + * Short keys + * ========================================== + * One of the shortcomings of XXH32 and XXH64 was that their performance was + * sub-optimal on short lengths. It used an iterative algorithm which strongly + * favored lengths that were a multiple of 4 or 8. + * + * Instead of iterating over individual inputs, we use a set of single shot + * functions which piece together a range of lengths and operate in constant time. + * + * Additionally, the number of multiplies has been significantly reduced. This + * reduces latency, especially when emulating 64-bit multiplies on 32-bit. + * + * Depending on the platform, this may or may not be faster than XXH32, but it + * is almost guaranteed to be faster than XXH64. + */ + +/* + * At very short lengths, there isn't enough input to fully hide secrets, or use + * the entire secret. + * + * There is also only a limited amount of mixing we can do before significantly + * impacting performance. + * + * Therefore, we use different sections of the secret and always mix two secret + * samples with an XOR. This should have no effect on performance on the + * seedless or withSeed variants because everything _should_ be constant folded + * by modern compilers. + * + * The XOR mixing hides individual parts of the secret and increases entropy. + * + * This adds an extra layer of strength for custom secrets. + */ +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combined = { input[0], 0x01, input[0], input[0] } + * len = 2: combined = { input[1], 0x02, input[0], input[1] } + * len = 3: combined = { input[2], 0x03, input[0], input[1] } + */ + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) + | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; + xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; + return XXH64_avalanche(keyed); + } +} + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len < 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { xxh_u32 const input1 = XXH_readLE32(input); + xxh_u32 const input2 = XXH_readLE32(input + len - 4); + xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed; + xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); + xxh_u64 const keyed = input64 ^ bitflip; + return XXH3_rrmxmx(keyed, len); + } +} + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(8 <= len && len <= 16); + { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; + xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed; + xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; + xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; + xxh_u64 const acc = len + + XXH_swap64(input_lo) + input_hi + + XXH3_mul128_fold64(input_lo, input_hi); + return XXH3_avalanche(acc); + } +} + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(len <= 16); + { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed); + if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed); + if (len) return XXH3_len_1to3_64b(input, len, secret, seed); + return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64))); + } +} + +/* + * DISCLAIMER: There are known *seed-dependent* multicollisions here due to + * multiplication by zero, affecting hashes of lengths 17 to 240. + * + * However, they are very unlikely. + * + * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all + * unseeded non-cryptographic hashes, it does not attempt to defend itself + * against specially crafted inputs, only random inputs. + * + * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes + * cancelling out the secret is taken an arbitrary number of times (addressed + * in XXH3_accumulate_512), this collision is very unlikely with random inputs + * and/or proper seeding: + * + * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a + * function that is only called up to 16 times per hash with up to 240 bytes of + * input. + * + * This is not too bad for a non-cryptographic hash function, especially with + * only 64 bit outputs. + * + * The 128-bit variant (which trades some speed for strength) is NOT affected + * by this, although it is always a good idea to use a proper seed if you care + * about strength. + */ +XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) +{ +#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ + /* + * UGLY HACK: + * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in + * slower code. + * + * By forcing seed64 into a register, we disrupt the cost model and + * cause it to scalarize. See `XXH32_round()` + * + * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, + * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on + * GCC 9.2, despite both emitting scalar code. + * + * GCC generates much better scalar code than Clang for the rest of XXH3, + * which is why finding a more optimal codepath is an interest. + */ + __asm__ ("" : "+r" (seed64)); +#endif + { xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 const input_hi = XXH_readLE64(input+8); + return XXH3_mul128_fold64( + input_lo ^ (XXH_readLE64(secret) + seed64), + input_hi ^ (XXH_readLE64(secret+8) - seed64) + ); + } +} + +/* For mid range keys, XXH3 uses a Mum-hash variant. */ +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { xxh_u64 acc = len * XXH_PRIME64_1; + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc += XXH3_mix16B(input+48, secret+96, seed); + acc += XXH3_mix16B(input+len-64, secret+112, seed); + } + acc += XXH3_mix16B(input+32, secret+64, seed); + acc += XXH3_mix16B(input+len-48, secret+80, seed); + } + acc += XXH3_mix16B(input+16, secret+32, seed); + acc += XXH3_mix16B(input+len-32, secret+48, seed); + } + acc += XXH3_mix16B(input+0, secret+0, seed); + acc += XXH3_mix16B(input+len-16, secret+16, seed); + + return XXH3_avalanche(acc); + } +} + +#define XXH3_MIDSIZE_MAX 240 + +XXH_NO_INLINE XXH64_hash_t +XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + + #define XXH3_MIDSIZE_STARTOFFSET 3 + #define XXH3_MIDSIZE_LASTOFFSET 17 + + { xxh_u64 acc = len * XXH_PRIME64_1; + int const nbRounds = (int)len / 16; + int i; + for (i=0; i<8; i++) { + acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); + } + acc = XXH3_avalanche(acc); + XXH_ASSERT(nbRounds >= 8); +#if defined(__clang__) /* Clang */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. + * In everywhere else, it uses scalar code. + * + * For 64->128-bit multiplies, even if the NEON was 100% optimal, it + * would still be slower than UMAAL (see XXH_mult64to128). + * + * Unfortunately, Clang doesn't handle the long multiplies properly and + * converts them to the nonexistent "vmulq_u64" intrinsic, which is then + * scalarized into an ugly mess of VMOV.32 instructions. + * + * This mess is difficult to avoid without turning autovectorization + * off completely, but they are usually relatively minor and/or not + * worth it to fix. + * + * This loop is the easiest to fix, as unlike XXH32, this pragma + * _actually works_ because it is a loop vectorization instead of an + * SLP vectorization. + */ + #pragma clang loop vectorize(disable) +#endif + for (i=8 ; i < nbRounds; i++) { + acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); + } + /* last bytes */ + acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); + return XXH3_avalanche(acc); + } +} + + +/* ======= Long Keys ======= */ + +#define XXH_STRIPE_LEN 64 +#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ +#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) + +#ifdef XXH_OLD_NAMES +# define STRIPE_LEN XXH_STRIPE_LEN +# define ACC_NB XXH_ACC_NB +#endif + +XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) +{ + if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); + memcpy(dst, &v64, sizeof(v64)); +} + +/* Several intrinsic functions below are supposed to accept __int64 as argument, + * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . + * However, several environments do not define __int64 type, + * requiring a workaround. + */ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) + typedef int64_t xxh_i64; +#else + /* the following type must have a width of 64-bit */ + typedef long long xxh_i64; +#endif + +/* + * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized. + * + * It is a hardened version of UMAC, based off of FARSH's implementation. + * + * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD + * implementations, and it is ridiculously fast. + * + * We harden it by mixing the original input to the accumulators as well as the product. + * + * This means that in the (relatively likely) case of a multiply by zero, the + * original input is preserved. + * + * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve + * cross-pollination, as otherwise the upper and lower halves would be + * essentially independent. + * + * This doesn't matter on 64-bit hashes since they all get merged together in + * the end, so we skip the extra step. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ + +#if (XXH_VECTOR == XXH_AVX512) || defined(XXH_X86DISPATCH) + +#ifndef XXH_TARGET_AVX512 +# define XXH_TARGET_AVX512 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ALIGN(64) __m512i* const xacc = (__m512i *) acc; + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + + { + /* data_vec = input[0]; */ + __m512i const data_vec = _mm512_loadu_si512 (input); + /* key_vec = secret[0]; */ + __m512i const key_vec = _mm512_loadu_si512 (secret); + /* data_key = data_vec ^ key_vec; */ + __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo); + /* xacc[0] += swap(data_vec); */ + __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); + __m512i const sum = _mm512_add_epi64(*xacc, data_swap); + /* xacc[0] += product; */ + *xacc = _mm512_add_epi64(product, sum); + } +} + +/* + * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. + * + * Multiplication isn't perfect, as explained by Google in HighwayHash: + * + * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + * // varying degrees. In descending order of goodness, bytes + * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + * // As expected, the upper and lower bytes are much worse. + * + * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 + * + * Since our algorithm uses a pseudorandom secret to add some variance into the + * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. + * + * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid + * extraction. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + { XXH_ALIGN(64) __m512i* const xacc = (__m512i*) acc; + const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); + + /* xacc[0] ^= (xacc[0] >> 47) */ + __m512i const acc_vec = *xacc; + __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47); + __m512i const data_vec = _mm512_xor_si512 (acc_vec, shifted); + /* xacc[0] ^= secret; */ + __m512i const key_vec = _mm512_loadu_si512 (secret); + __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); + + /* xacc[0] *= XXH_PRIME32_1; */ + __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1)); + __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32); + __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32); + *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); + } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); + XXH_ASSERT(((size_t)customSecret & 63) == 0); + (void)(&XXH_writeLE64); + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); + __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, -(xxh_i64)seed64); + + XXH_ALIGN(64) const __m512i* const src = (const __m512i*) XXH3_kSecret; + XXH_ALIGN(64) __m512i* const dest = ( __m512i*) customSecret; + int i; + for (i=0; i < nbRounds; ++i) { + /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*', + * this will warn "discards ‘const’ qualifier". */ + union { + XXH_ALIGN(64) const __m512i* cp; + XXH_ALIGN(64) void* p; + } remote_const_void; + remote_const_void.cp = src + i; + dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed); + } } +} + +#endif + +#if (XXH_VECTOR == XXH_AVX2) || defined(XXH_X86DISPATCH) + +#ifndef XXH_TARGET_AVX2 +# define XXH_TARGET_AVX2 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void +XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 31) == 0); + { XXH_ALIGN(32) __m256i* const xacc = (__m256i *) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xinput = (const __m256i *) input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xsecret = (const __m256i *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { + /* data_vec = xinput[i]; */ + __m256i const data_vec = _mm256_loadu_si256 (xinput+i); + /* key_vec = xsecret[i]; */ + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + /* data_key = data_vec ^ key_vec; */ + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); + __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm256_add_epi64(product, sum); + } } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void +XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 31) == 0); + { XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xsecret = (const __m256i *) secret; + const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m256i const acc_vec = xacc[i]; + __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); + __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); + /* xacc[i] ^= xsecret; */ + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); + __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); + } + } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); + (void)(&XXH_writeLE64); + XXH_PREFETCH(customSecret); + { __m256i const seed = _mm256_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64, -(xxh_i64)seed64, (xxh_i64)seed64); + + XXH_ALIGN(64) const __m256i* const src = (const __m256i*) XXH3_kSecret; + XXH_ALIGN(64) __m256i* dest = ( __m256i*) customSecret; + +# if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + * The asm hack causes Clang to assume that XXH3_kSecretPtr aliases with + * customSecret, and on aarch64, this prevented LDP from merging two + * loads together for free. Putting the loads together before the stores + * properly generates LDP. + */ + __asm__("" : "+r" (dest)); +# endif + + /* GCC -O2 need unroll loop manually */ + dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed); + dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed); + dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed); + dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed); + dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed); + dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed); + } +} + +#endif + +#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) + +#ifndef XXH_TARGET_SSE2 +# define XXH_TARGET_SSE2 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void +XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + /* SSE2 is just a half-scale version of the AVX2 version. */ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { XXH_ALIGN(16) __m128i* const xacc = (__m128i *) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xinput = (const __m128i *) input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xsecret = (const __m128i *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { + /* data_vec = xinput[i]; */ + __m128i const data_vec = _mm_loadu_si128 (xinput+i); + /* key_vec = xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + /* data_key = data_vec ^ key_vec; */ + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m128i const product = _mm_mul_epu32 (data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); + __m128i const sum = _mm_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm_add_epi64(product, sum); + } } +} + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void +XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xsecret = (const __m128i *) secret; + const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m128i const acc_vec = xacc[i]; + __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); + __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); + /* xacc[i] ^= xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); + __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); + } + } +} + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); + (void)(&XXH_writeLE64); + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); + +# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 + // MSVC 32bit mode does not support _mm_set_epi64x before 2015 + XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, -(xxh_i64)seed64 }; + __m128i const seed = _mm_load_si128((__m128i const*)seed64x2); +# else + __m128i const seed = _mm_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64); +# endif + int i; + + XXH_ALIGN(64) const float* const src = (float const*) XXH3_kSecret; + XXH_ALIGN(XXH_SEC_ALIGN) __m128i* dest = (__m128i*) customSecret; +# if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + */ + __asm__("" : "+r" (dest)); +# endif + + for (i=0; i < nbRounds; ++i) { + dest[i] = _mm_add_epi64(_mm_castps_si128(_mm_load_ps(src+i*4)), seed); + } } +} + +#endif + +#if (XXH_VECTOR == XXH_NEON) + +XXH_FORCE_INLINE void +XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { + XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc; + /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ + uint8_t const* const xinput = (const uint8_t *) input; + uint8_t const* const xsecret = (const uint8_t *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) { + /* data_vec = xinput[i]; */ + uint8x16_t data_vec = vld1q_u8(xinput + (i * 16)); + /* key_vec = xsecret[i]; */ + uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16)); + uint64x2_t data_key; + uint32x2_t data_key_lo, data_key_hi; + /* xacc[i] += swap(data_vec); */ + uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec); + uint64x2_t const swapped = vextq_u64(data64, data64, 1); + xacc[i] = vaddq_u64 (xacc[i], swapped); + /* data_key = data_vec ^ key_vec; */ + data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec)); + /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF); + * data_key_hi = (uint32x2_t) (data_key >> 32); + * data_key = UNDEFINED; */ + XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi); + /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */ + xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi); + + } + } +} + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + + { uint64x2_t* xacc = (uint64x2_t*) acc; + uint8_t const* xsecret = (uint8_t const*) secret; + uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + uint64x2_t acc_vec = xacc[i]; + uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47); + uint64x2_t data_vec = veorq_u64 (acc_vec, shifted); + + /* xacc[i] ^= xsecret[i]; */ + uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16)); + uint64x2_t data_key = veorq_u64(data_vec, vreinterpretq_u64_u8(key_vec)); + + /* xacc[i] *= XXH_PRIME32_1 */ + uint32x2_t data_key_lo, data_key_hi; + /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF); + * data_key_hi = (uint32x2_t) (xacc[i] >> 32); + * xacc[i] = UNDEFINED; */ + XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi); + { /* + * prod_hi = (data_key >> 32) * XXH_PRIME32_1; + * + * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will + * incorrectly "optimize" this: + * tmp = vmul_u32(vmovn_u64(a), vmovn_u64(b)); + * shifted = vshll_n_u32(tmp, 32); + * to this: + * tmp = "vmulq_u64"(a, b); // no such thing! + * shifted = vshlq_n_u64(tmp, 32); + * + * However, unlike SSE, Clang lacks a 64-bit multiply routine + * for NEON, and it scalarizes two 64-bit multiplies instead. + * + * vmull_u32 has the same timing as vmul_u32, and it avoids + * this bug completely. + * See https://bugs.llvm.org/show_bug.cgi?id=39967 + */ + uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime); + /* xacc[i] = prod_hi << 32; */ + xacc[i] = vshlq_n_u64(prod_hi, 32); + /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */ + xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime); + } + } } +} + +#endif + +#if (XXH_VECTOR == XXH_VSX) + +XXH_FORCE_INLINE void +XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + xxh_u64x2* const xacc = (xxh_u64x2*) acc; /* presumed aligned */ + xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */ + xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */ + xxh_u64x2 const v32 = { 32, 32 }; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* data_vec = xinput[i]; */ + xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i); + /* key_vec = xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + /* shuffled = (data_key << 32) | (data_key >> 32); */ + xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); + /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ + xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); + xacc[i] += product; + + /* swap high and low halves */ +#ifdef __s390x__ + xacc[i] += vec_permi(data_vec, data_vec, 2); +#else + xacc[i] += vec_xxpermdi(data_vec, data_vec, 2); +#endif + } +} + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + + { xxh_u64x2* const xacc = (xxh_u64x2*) acc; + const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret; + /* constants */ + xxh_u64x2 const v32 = { 32, 32 }; + xxh_u64x2 const v47 = { 47, 47 }; + xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + xxh_u64x2 const acc_vec = xacc[i]; + xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); + + /* xacc[i] ^= xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + + /* xacc[i] *= XXH_PRIME32_1 */ + /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ + xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); + /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ + xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); + xacc[i] = prod_odd + (prod_even << v32); + } } +} + +#endif + +/* scalar variants - universal */ + +XXH_FORCE_INLINE void +XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ + const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */ + const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ + size_t i; + XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); + for (i=0; i < XXH_ACC_NB; i++) { + xxh_u64 const data_val = XXH_readLE64(xinput + 8*i); + xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8); + xacc[i ^ 1] += data_val; /* swap adjacent lanes */ + xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32); + } +} + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ + const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ + size_t i; + XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); + for (i=0; i < XXH_ACC_NB; i++) { + xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i); + xxh_u64 acc64 = xacc[i]; + acc64 = XXH_xorshift64(acc64, 47); + acc64 ^= key64; + acc64 *= XXH_PRIME32_1; + xacc[i] = acc64; + } +} + +XXH_FORCE_INLINE void +XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + /* + * We need a separate pointer for the hack below, + * which requires a non-const pointer. + * Any decent compiler will optimize this out otherwise. + */ + const xxh_u8* kSecretPtr = XXH3_kSecret; + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); + +#if defined(__clang__) && defined(__aarch64__) + /* + * UGLY HACK: + * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are + * placed sequentially, in order, at the top of the unrolled loop. + * + * While MOVK is great for generating constants (2 cycles for a 64-bit + * constant compared to 4 cycles for LDR), long MOVK chains stall the + * integer pipelines: + * I L S + * MOVK + * MOVK + * MOVK + * MOVK + * ADD + * SUB STR + * STR + * By forcing loads from memory (as the asm line causes Clang to assume + * that XXH3_kSecretPtr has been changed), the pipelines are used more + * efficiently: + * I L S + * LDR + * ADD LDR + * SUB STR + * STR + * XXH3_64bits_withSeed, len == 256, Snapdragon 835 + * without hack: 2654.4 MB/s + * with hack: 3202.9 MB/s + */ + __asm__("" : "+r" (kSecretPtr)); +#endif + /* + * Note: in debug mode, this overrides the asm optimization + * and Clang will emit MOVK chains again. + */ + XXH_ASSERT(kSecretPtr == XXH3_kSecret); + + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; + int i; + for (i=0; i < nbRounds; i++) { + /* + * The asm hack causes Clang to assume that kSecretPtr aliases with + * customSecret, and on aarch64, this prevented LDP from merging two + * loads together for free. Putting the loads together before the stores + * properly generates LDP. + */ + xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64; + xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64; + XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo); + XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi); + } } +} + + +typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*); +typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*); +typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); + + +#if (XXH_VECTOR == XXH_AVX512) + +#define XXH3_accumulate_512 XXH3_accumulate_512_avx512 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 +#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 + +#elif (XXH_VECTOR == XXH_AVX2) + +#define XXH3_accumulate_512 XXH3_accumulate_512_avx2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 +#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 + +#elif (XXH_VECTOR == XXH_SSE2) + +#define XXH3_accumulate_512 XXH3_accumulate_512_sse2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 +#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 + +#elif (XXH_VECTOR == XXH_NEON) + +#define XXH3_accumulate_512 XXH3_accumulate_512_neon +#define XXH3_scrambleAcc XXH3_scrambleAcc_neon +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#elif (XXH_VECTOR == XXH_VSX) + +#define XXH3_accumulate_512 XXH3_accumulate_512_vsx +#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#else /* scalar */ + +#define XXH3_accumulate_512 XXH3_accumulate_512_scalar +#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#endif + + + +#ifndef XXH_PREFETCH_DIST +# ifdef __clang__ +# define XXH_PREFETCH_DIST 320 +# else +# if (XXH_VECTOR == XXH_AVX512) +# define XXH_PREFETCH_DIST 512 +# else +# define XXH_PREFETCH_DIST 384 +# endif +# endif /* __clang__ */ +#endif /* XXH_PREFETCH_DIST */ + +/* + * XXH3_accumulate() + * Loops over XXH3_accumulate_512(). + * Assumption: nbStripes will not overflow the secret size + */ +XXH_FORCE_INLINE void +XXH3_accumulate( xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, + size_t nbStripes, + XXH3_f_accumulate_512 f_acc512) +{ + size_t n; + for (n = 0; n < nbStripes; n++ ) { + const xxh_u8* const in = input + n*XXH_STRIPE_LEN; + XXH_PREFETCH(in + XXH_PREFETCH_DIST); + f_acc512(acc, + in, + secret + n*XXH_SECRET_CONSUME_RATE); + } +} + +XXH_FORCE_INLINE void +XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; + size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; + size_t const nb_blocks = (len - 1) / block_len; + + size_t n; + + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + + for (n = 0; n < nb_blocks; n++) { + XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512); + f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); + } + + /* last partial block */ + XXH_ASSERT(len > XXH_STRIPE_LEN); + { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; + XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); + XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512); + + /* last stripe */ + { const xxh_u8* const p = input + len - XXH_STRIPE_LEN; +#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ + f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); + } } +} + +XXH_FORCE_INLINE xxh_u64 +XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) +{ + return XXH3_mul128_fold64( + acc[0] ^ XXH_readLE64(secret), + acc[1] ^ XXH_readLE64(secret+8) ); +} + +static XXH64_hash_t +XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) +{ + xxh_u64 result64 = start; + size_t i = 0; + + for (i = 0; i < 4; i++) { + result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i); +#if defined(__clang__) /* Clang */ \ + && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Prevent autovectorization on Clang ARMv7-a. Exact same problem as + * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. + * XXH3_64bits, len == 256, Snapdragon 835: + * without hack: 2063.7 MB/s + * with hack: 2560.7 MB/s + */ + __asm__("" : "+r" (result64)); +#endif + } + + return XXH3_avalanche(result64); +} + +#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ + XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 } + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, + const void* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; + + XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble); + + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + /* do not align on 8, so that the secret is different from the accumulator */ +#define XXH_SECRET_MERGEACCS_START 11 + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1); +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc); +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + * Since the function is not inlined, the compiler may not be able to understand that, + * in some scenarios, its `secret` argument is actually a compile time constant. + * This variant enforces that the compiler can detect that, + * and uses this opportunity to streamline the generated code for better performance. + */ +XXH_NO_INLINE XXH64_hash_t +XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc); +} + +/* + * XXH3_hashLong_64b_withSeed(): + * Generate a custom key based on alteration of default XXH3_kSecret with the seed, + * and then use this key for long mode hashing. + * + * This operation is decently fast but nonetheless costs a little bit of time. + * Try to avoid it whenever possible (typically when seed==0). + * + * It's important for performance that XXH3_hashLong is not inlined. Not sure + * why (uop cache maybe?), but the difference is large and easily measurable. + */ +XXH_FORCE_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, + XXH64_hash_t seed, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) +{ + if (seed == 0) + return XXH3_hashLong_64b_internal(input, len, + XXH3_kSecret, sizeof(XXH3_kSecret), + f_acc512, f_scramble); + { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed); + return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), + f_acc512, f_scramble); + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSeed(const void* input, size_t len, + XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + + +typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t, + XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, + XXH3_hashLong64_f f_hashLong) +{ + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secretLen` condition is not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + * Also, note that function signature doesn't offer room to return an error. + */ + if (len <= 16) + return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen); +} + + +/* === Public entry point === */ + +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len) +{ + return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); +} + +XXH_PUBLIC_API XXH64_hash_t +XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize) +{ + return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); +} + +XXH_PUBLIC_API XXH64_hash_t +XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); +} + + +/* === XXH3 streaming === */ + +/* + * Malloc's a pointer that is always aligned to align. + * + * This must be freed with `XXH_alignedFree()`. + * + * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte + * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2 + * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON. + * + * This underalignment previously caused a rather obvious crash which went + * completely unnoticed due to XXH3_createState() not actually being tested. + * Credit to RedSpah for noticing this bug. + * + * The alignment is done manually: Functions like posix_memalign or _mm_malloc + * are avoided: To maintain portability, we would have to write a fallback + * like this anyways, and besides, testing for the existence of library + * functions without relying on external build tools is impossible. + * + * The method is simple: Overallocate, manually align, and store the offset + * to the original behind the returned pointer. + * + * Align must be a power of 2 and 8 <= align <= 128. + */ +static void* XXH_alignedMalloc(size_t s, size_t align) +{ + XXH_ASSERT(align <= 128 && align >= 8); /* range check */ + XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */ + XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ + { /* Overallocate to make room for manual realignment and an offset byte */ + xxh_u8* base = (xxh_u8*)XXH_malloc(s + align); + if (base != NULL) { + /* + * Get the offset needed to align this pointer. + * + * Even if the returned pointer is aligned, there will always be + * at least one byte to store the offset to the original pointer. + */ + size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ + /* Add the offset for the now-aligned pointer */ + xxh_u8* ptr = base + offset; + + XXH_ASSERT((size_t)ptr % align == 0); + + /* Store the offset immediately before the returned pointer. */ + ptr[-1] = (xxh_u8)offset; + return ptr; + } + return NULL; + } +} +/* + * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass + * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout. + */ +static void XXH_alignedFree(void* p) +{ + if (p != NULL) { + xxh_u8* ptr = (xxh_u8*)p; + /* Get the offset byte we added in XXH_malloc. */ + xxh_u8 offset = ptr[-1]; + /* Free the original malloc'd pointer */ + xxh_u8* base = ptr - offset; + XXH_free(base); + } +} +XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) +{ + XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); + if (state==NULL) return NULL; + XXH3_INITSTATE(state); + return state; +} + +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) +{ + XXH_alignedFree(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void +XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state) +{ + memcpy(dst_state, src_state, sizeof(*dst_state)); +} + +static void +XXH3_64bits_reset_internal(XXH3_state_t* statePtr, + XXH64_hash_t seed, + const void* secret, size_t secretSize) +{ + size_t const initStart = offsetof(XXH3_state_t, bufferedSize); + size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; + XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); + XXH_ASSERT(statePtr != NULL); + /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ + memset((char*)statePtr + initStart, 0, initLength); + statePtr->acc[0] = XXH_PRIME32_3; + statePtr->acc[1] = XXH_PRIME64_1; + statePtr->acc[2] = XXH_PRIME64_2; + statePtr->acc[3] = XXH_PRIME64_3; + statePtr->acc[4] = XXH_PRIME64_4; + statePtr->acc[5] = XXH_PRIME32_2; + statePtr->acc[6] = XXH_PRIME64_5; + statePtr->acc[7] = XXH_PRIME32_1; + statePtr->seed = seed; + statePtr->extSecret = (const unsigned char*)secret; + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; + statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset(XXH3_state_t* statePtr) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_64bits_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize); + if (secret == NULL) return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) +{ + if (statePtr == NULL) return XXH_ERROR; + if (seed==0) return XXH3_64bits_reset(statePtr); + if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed); + XXH3_64bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +/* Note : when XXH3_consumeStripes() is invoked, + * there must be a guarantee that at least one more byte must be consumed from input + * so that the function can blindly consume all stripes using the "normal" secret segment */ +XXH_FORCE_INLINE void +XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, + size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, + const xxh_u8* XXH_RESTRICT input, size_t nbStripes, + const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */ + XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock); + if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) { + /* need a scrambling operation */ + size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr; + size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock; + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512); + f_scramble(acc, secret + secretLimit); + XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512); + *nbStripesSoFarPtr = nbStripesAfterBlock; + } else { + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512); + *nbStripesSoFarPtr += nbStripes; + } +} + +/* + * Both XXH3_64bits_update and XXH3_128bits_update use this routine. + */ +XXH_FORCE_INLINE XXH_errorcode +XXH3_update(XXH3_state_t* state, + const xxh_u8* input, size_t len, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* const bEnd = input + len; + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + + state->totalLen += len; + + if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */ + XXH_memcpy(state->buffer + state->bufferedSize, input, len); + state->bufferedSize += (XXH32_hash_t)len; + return XXH_OK; + } + /* total input is now > XXH3_INTERNALBUFFER_SIZE */ + + #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) + XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ + + /* + * Internal buffer is partially filled (always, except at beginning) + * Complete it, then consume it. + */ + if (state->bufferedSize) { + size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; + XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); + input += loadSize; + XXH3_consumeStripes(state->acc, + &state->nbStripesSoFar, state->nbStripesPerBlock, + state->buffer, XXH3_INTERNALBUFFER_STRIPES, + secret, state->secretLimit, + f_acc512, f_scramble); + state->bufferedSize = 0; + } + XXH_ASSERT(input < bEnd); + + /* Consume input by a multiple of internal buffer size */ + if (input+XXH3_INTERNALBUFFER_SIZE < bEnd) { + const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE; + do { + XXH3_consumeStripes(state->acc, + &state->nbStripesSoFar, state->nbStripesPerBlock, + input, XXH3_INTERNALBUFFER_STRIPES, + secret, state->secretLimit, + f_acc512, f_scramble); + input += XXH3_INTERNALBUFFER_SIZE; + } while (inputbuffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); + } + XXH_ASSERT(input < bEnd); + + /* Some remaining input (always) : buffer it */ + XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); + state->bufferedSize = (XXH32_hash_t)(bEnd-input); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512, XXH3_scrambleAcc); +} + + +XXH_FORCE_INLINE void +XXH3_digest_long (XXH64_hash_t* acc, + const XXH3_state_t* state, + const unsigned char* secret) +{ + /* + * Digest on a local copy. This way, the state remains unaltered, and it can + * continue ingesting more input afterwards. + */ + memcpy(acc, state->acc, sizeof(state->acc)); + if (state->bufferedSize >= XXH_STRIPE_LEN) { + size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; + size_t nbStripesSoFar = state->nbStripesSoFar; + XXH3_consumeStripes(acc, + &nbStripesSoFar, state->nbStripesPerBlock, + state->buffer, nbStripes, + secret, state->secretLimit, + XXH3_accumulate_512, XXH3_scrambleAcc); + /* last stripe */ + XXH3_accumulate_512(acc, + state->buffer + state->bufferedSize - XXH_STRIPE_LEN, + secret + state->secretLimit - XXH_SECRET_LASTACC_START); + } else { /* bufferedSize < XXH_STRIPE_LEN */ + xxh_u8 lastStripe[XXH_STRIPE_LEN]; + size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; + XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ + memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); + memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); + XXH3_accumulate_512(acc, + lastStripe, + secret + state->secretLimit - XXH_SECRET_LASTACC_START); + } +} + +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state) +{ + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + return XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)state->totalLen * XXH_PRIME64_1); + } + /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ + if (state->seed) + return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), + secret, state->secretLimit + XXH_STRIPE_LEN); +} + + +#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) + +XXH_PUBLIC_API void +XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize) +{ + XXH_ASSERT(secretBuffer != NULL); + if (customSeedSize == 0) { + memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return; + } + XXH_ASSERT(customSeed != NULL); + + { size_t const segmentSize = sizeof(XXH128_hash_t); + size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize; + XXH128_canonical_t scrambler; + XXH64_hash_t seeds[12]; + size_t segnb; + XXH_ASSERT(nbSegments == 12); + XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */ + XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0)); + + /* + * Copy customSeed to seeds[], truncating or repeating as necessary. + */ + { size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds)); + size_t filled = toFill; + memcpy(seeds, customSeed, toFill); + while (filled < sizeof(seeds)) { + toFill = XXH_MIN(filled, sizeof(seeds) - filled); + memcpy((char*)seeds + filled, seeds, toFill); + filled += toFill; + } } + + /* generate secret */ + memcpy(secretBuffer, &scrambler, sizeof(scrambler)); + for (segnb=1; segnb < nbSegments; segnb++) { + size_t const segmentStart = segnb * segmentSize; + XXH128_canonical_t segment; + XXH128_canonicalFromHash(&segment, + XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) ); + memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment)); + } } +} + + +/* ========================================== + * XXH3 128 bits (a.k.a XXH128) + * ========================================== + * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant, + * even without counting the significantly larger output size. + * + * For example, extra steps are taken to avoid the seed-dependent collisions + * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B). + * + * This strength naturally comes at the cost of some speed, especially on short + * lengths. Note that longer hashes are about as fast as the 64-bit version + * due to it using only a slight modification of the 64-bit loop. + * + * XXH128 is also more oriented towards 64-bit machines. It is still extremely + * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64). + */ + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + /* A doubled version of 1to3_64b with different constants. */ + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } + * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } + * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } + */ + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24) + | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); + xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; + xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed; + xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; + xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; + XXH128_hash_t h128; + h128.low64 = XXH64_avalanche(keyed_lo); + h128.high64 = XXH64_avalanche(keyed_hi); + return h128; + } +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len <= 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { xxh_u32 const input_lo = XXH_readLE32(input); + xxh_u32 const input_hi = XXH_readLE32(input + len - 4); + xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); + xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed; + xxh_u64 const keyed = input_64 ^ bitflip; + + /* Shift len to the left to ensure it is even, this avoids even multiplies. */ + XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); + + m128.high64 += (m128.low64 << 1); + m128.low64 ^= (m128.high64 >> 3); + + m128.low64 = XXH_xorshift64(m128.low64, 35); + m128.low64 *= 0x9FB21C651E98DF25ULL; + m128.low64 = XXH_xorshift64(m128.low64, 28); + m128.high64 = XXH3_avalanche(m128.high64); + return m128; + } +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(9 <= len && len <= 16); + { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; + xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed; + xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 input_hi = XXH_readLE64(input + len - 8); + XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); + /* + * Put len in the middle of m128 to ensure that the length gets mixed to + * both the low and high bits in the 128x64 multiply below. + */ + m128.low64 += (xxh_u64)(len - 1) << 54; + input_hi ^= bitfliph; + /* + * Add the high 32 bits of input_hi to the high 32 bits of m128, then + * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to + * the high 64 bits of m128. + * + * The best approach to this operation is different on 32-bit and 64-bit. + */ + if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ + /* + * 32-bit optimized version, which is more readable. + * + * On 32-bit, it removes an ADC and delays a dependency between the two + * halves of m128.high64, but it generates an extra mask on 64-bit. + */ + m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); + } else { + /* + * 64-bit optimized (albeit more confusing) version. + * + * Uses some properties of addition and multiplication to remove the mask: + * + * Let: + * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) + * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) + * c = XXH_PRIME32_2 + * + * a + (b * c) + * Inverse Property: x + y - x == y + * a + (b * (1 + c - 1)) + * Distributive Property: x * (y + z) == (x * y) + (x * z) + * a + (b * 1) + (b * (c - 1)) + * Identity Property: x * 1 == x + * a + b + (b * (c - 1)) + * + * Substitute a, b, and c: + * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + * + * Since input_hi.hi + input_hi.lo == input_hi, we get this: + * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + */ + m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); + } + /* m128 ^= XXH_swap64(m128 >> 64); */ + m128.low64 ^= XXH_swap64(m128.high64); + + { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ + XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); + h128.high64 += m128.high64 * XXH_PRIME64_2; + + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = XXH3_avalanche(h128.high64); + return h128; + } } +} + +/* + * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN + */ +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(len <= 16); + { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); + if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); + if (len) return XXH3_len_1to3_128b(input, len, secret, seed); + { XXH128_hash_t h128; + xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72); + xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88); + h128.low64 = XXH64_avalanche(seed ^ bitflipl); + h128.high64 = XXH64_avalanche( seed ^ bitfliph); + return h128; + } } +} + +/* + * A bit slower than XXH3_mix16B, but handles multiply by zero better. + */ +XXH_FORCE_INLINE XXH128_hash_t +XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, + const xxh_u8* secret, XXH64_hash_t seed) +{ + acc.low64 += XXH3_mix16B (input_1, secret+0, seed); + acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); + acc.high64 += XXH3_mix16B (input_2, secret+16, seed); + acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); + return acc; +} + + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { XXH128_hash_t acc; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); + } + acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); + } + acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); + } + acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); + { XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + + (acc.high64 * XXH_PRIME64_4) + + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; + } + } +} + +XXH_NO_INLINE XXH128_hash_t +XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + + { XXH128_hash_t acc; + int const nbRounds = (int)len / 32; + int i; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; + for (i=0; i<4; i++) { + acc = XXH128_mix32B(acc, + input + (32 * i), + input + (32 * i) + 16, + secret + (32 * i), + seed); + } + acc.low64 = XXH3_avalanche(acc.low64); + acc.high64 = XXH3_avalanche(acc.high64); + XXH_ASSERT(nbRounds >= 4); + for (i=4 ; i < nbRounds; i++) { + acc = XXH128_mix32B(acc, + input + (32 * i), + input + (32 * i) + 16, + secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)), + seed); + } + /* last bytes */ + acc = XXH128_mix32B(acc, + input + len - 16, + input + len - 32, + secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, + 0ULL - seed); + + { XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + + (acc.high64 * XXH_PRIME64_4) + + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; + } + } +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; + + XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble); + + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + { XXH128_hash_t h128; + h128.low64 = XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)len * XXH_PRIME64_1); + h128.high64 = XXH3_mergeAccs(acc, + secret + secretSize + - sizeof(acc) - XXH_SECRET_MERGEACCS_START, + ~((xxh_u64)len * XXH_PRIME64_2)); + return h128; + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH128_hash_t +XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_accumulate_512, XXH3_scrambleAcc); +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512, XXH3_scrambleAcc); +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) +{ + if (seed64 == 0) + return XXH3_hashLong_128b_internal(input, len, + XXH3_kSecret, sizeof(XXH3_kSecret), + f_acc512, f_scramble); + { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed64); + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret), + f_acc512, f_scramble); + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSeed(const void* input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, + XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + +typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t, + XXH64_hash_t, const void* XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_128bits_internal(const void* input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, + XXH3_hashLong128_f f_hl128) +{ + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secret` conditions are not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + */ + if (len <= 16) + return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + return f_hl128(input, len, seed64, secret, secretLen); +} + + +/* === Public XXH128 API === */ + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len) +{ + return XXH3_128bits_internal(input, len, 0, + XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_hashLong_128b_default); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize) +{ + return XXH3_128bits_internal(input, len, 0, + (const xxh_u8*)secret, secretSize, + XXH3_hashLong_128b_withSecret); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_internal(input, len, seed, + XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_hashLong_128b_withSeed); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH128(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_withSeed(input, len, seed); +} + + +/* === XXH3 128-bit streaming === */ + +/* + * All the functions are actually the same as for 64-bit streaming variant. + * The only difference is the finalizatiom routine. + */ + +static void +XXH3_128bits_reset_internal(XXH3_state_t* statePtr, + XXH64_hash_t seed, + const void* secret, size_t secretSize) +{ + XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize); +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset(XXH3_state_t* statePtr) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_128bits_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize); + if (secret == NULL) return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) +{ + if (statePtr == NULL) return XXH_ERROR; + if (seed==0) return XXH3_128bits_reset(statePtr); + if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed); + XXH3_128bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512, XXH3_scrambleAcc); +} + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state) +{ + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + { XXH128_hash_t h128; + h128.low64 = XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)state->totalLen * XXH_PRIME64_1); + h128.high64 = XXH3_mergeAccs(acc, + secret + state->secretLimit + XXH_STRIPE_LEN + - sizeof(acc) - XXH_SECRET_MERGEACCS_START, + ~((xxh_u64)state->totalLen * XXH_PRIME64_2)); + return h128; + } + } + /* len <= XXH3_MIDSIZE_MAX : short code */ + if (state->seed) + return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), + secret, state->secretLimit + XXH_STRIPE_LEN); +} + +/* 128-bit utility functions */ + +#include /* memcmp, memcpy */ + +/* return : 1 is equal, 0 if different */ +XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) +{ + /* note : XXH128_hash_t is compact, it has no padding byte */ + return !(memcmp(&h1, &h2, sizeof(h1))); +} + +/* This prototype is compatible with stdlib's qsort(). + * return : >0 if *h128_1 > *h128_2 + * <0 if *h128_1 < *h128_2 + * =0 if *h128_1 == *h128_2 */ +XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2) +{ + XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1; + XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2; + int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); + /* note : bets that, in most cases, hash values are different */ + if (hcmp) return hcmp; + return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); +} + + +/*====== Canonical representation ======*/ +XXH_PUBLIC_API void +XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) { + hash.high64 = XXH_swap64(hash.high64); + hash.low64 = XXH_swap64(hash.low64); + } + memcpy(dst, &hash.high64, sizeof(hash.high64)); + memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH128_hashFromCanonical(const XXH128_canonical_t* src) +{ + XXH128_hash_t h; + h.high64 = XXH_readBE64(src); + h.low64 = XXH_readBE64(src->digest + 8); + return h; +} + +/* Pop our optimization override from above */ +#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ +# pragma GCC pop_options +#endif + +#endif /* XXH_NO_LONG_LONG */ + + +#endif /* XXH_IMPLEMENTATION */ + + +#if defined (__cplusplus) +} +#endif diff --git a/deps/xxHash/xxhsum.1 b/deps/xxHash/xxhsum.1 index 3c8f2c48a..dd17108f1 100644 --- a/deps/xxHash/xxhsum.1 +++ b/deps/xxHash/xxhsum.1 @@ -1,26 +1,20 @@ . -.TH "XXHSUM" "1" "September 2017" "xxhsum 0.6.3" "User Commands" +.TH "XXHSUM" "1" "July 2020" "xxhsum 0.7.4" "User Commands" . .SH "NAME" \fBxxhsum\fR \- print or check xxHash non\-cryptographic checksums . .SH "SYNOPSIS" -\fBxxhsum [